PyPI - llama-cpp-python - Versions diffs - 0.2.15__tar.gz → 0.2.16__tar.gz - Mend

llama-cpp-python 0.2.15tar.gz → 0.2.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (523) hide show

llama_cpp_python-0.2.16/.git/FETCH_HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ b7e60b66f47950e385980a1329af9dfb14da6906 'b7e60b66f47950e385980a1329af9dfb14da6906' of https://github.com/abetlen/llama-cpp-python

llama_cpp_python-0.2.16/.git/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ b7e60b66f47950e385980a1329af9dfb14da6906

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/.git/config RENAMED Viewed

@@ -9,7 +9,7 @@
 [gc]
 	auto = 0
 [http "https://github.com/"]
-	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzVCV2FnZFFjcjVieUJXTjZWTU5kcDhRd05QZVE5ZTFhTkp6Wg==
+	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzJBc3h5aUVKQkZ1Q3M5bjVaWU1ZTk9za3hoSzh0VDFMeVdwRw==
 [submodule "vendor/llama.cpp"]
 	active = true
 	url = https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.16/.git/index ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/logs/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 b7e60b66f47950e385980a1329af9dfb14da6906 runner <runner@fv-az711-229.kxtiaivj4gxuxgxjt4etq45iac.phxx.internal.cloudapp.net> 1699615337 +0000 checkout: moving from master to refs/tags/v0.2.16

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ a75fa576abba9d37f463580c379e4bbf1e1ad03c

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/.git/modules/vendor/llama.cpp/config RENAMED Viewed

@@ -13,7 +13,7 @@
 [gc]
 	auto = 0
 [http "https://github.com/"]
-	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzVCV2FnZFFjcjVieUJXTjZWTU5kcDhRd05QZVE5ZTFhTkp6Wg==
+	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzXzJBc3h5aUVKQkZ1Q3M5bjVaWU1ZTk9za3hoSzh0VDFMeVdwRw==
 [url "https://github.com/"]
 	insteadOf = git@github.com:
 	insteadOf = org-6826477@github.com:

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/index ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/logs/HEAD ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ 0000000000000000000000000000000000000000 a75fa576abba9d37f463580c379e4bbf1e1ad03c runner <runner@fv-az711-229.kxtiaivj4gxuxgxjt4etq45iac.phxx.internal.cloudapp.net> 1699615338 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
2	+ a75fa576abba9d37f463580c379e4bbf1e1ad03c a75fa576abba9d37f463580c379e4bbf1e1ad03c runner <runner@fv-az711-229.kxtiaivj4gxuxgxjt4etq45iac.phxx.internal.cloudapp.net> 1699615338 +0000 checkout: moving from master to a75fa576abba9d37f463580c379e4bbf1e1ad03c

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/logs/refs/heads/master ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 a75fa576abba9d37f463580c379e4bbf1e1ad03c runner <runner@fv-az711-229.kxtiaivj4gxuxgxjt4etq45iac.phxx.internal.cloudapp.net> 1699615338 +0000 clone: from https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 a75fa576abba9d37f463580c379e4bbf1e1ad03c runner <runner@fv-az711-229.kxtiaivj4gxuxgxjt4etq45iac.phxx.internal.cloudapp.net> 1699615338 +0000 clone: from https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/objects/pack/pack-e9e88c6e4829004ba3844e3ec02cda2d16322828.idx ADDED Viewed

Binary file

llama_cpp_python-0.2.15/.git/modules/vendor/llama.cpp/objects/pack/pack-a289e01376af5b952d8e50b0b34b637d8328146d.pack → llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/objects/pack/pack-e9e88c6e4829004ba3844e3ec02cda2d16322828.pack RENAMED Viewed

Binary file

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/objects/pack/pack-e9e88c6e4829004ba3844e3ec02cda2d16322828.rev ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/packed-refs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # pack-refs with: peeled fully-peeled sorted
2	+ a75fa576abba9d37f463580c379e4bbf1e1ad03c refs/remotes/origin/master

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/refs/heads/master ADDED Viewed

	@@ -0,0 +1 @@
1	+ a75fa576abba9d37f463580c379e4bbf1e1ad03c

llama_cpp_python-0.2.16/.git/modules/vendor/llama.cpp/shallow ADDED Viewed

	@@ -0,0 +1 @@
1	+ a75fa576abba9d37f463580c379e4bbf1e1ad03c

llama_cpp_python-0.2.16/.git/objects/23/c7e86cace58018b34f1dae1b548df9981eebf9 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/25/26bcbf5a89773bf179fd631c782274635da9e1 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/2a/6aed81cf0cc6d59972fe184a57666f281dbe8f ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/2e/18b47a0261b4e81255fc71811a7c2405e4e19f ADDED Viewed

Binary file

llama_cpp_python-0.2.15/.git/objects/09/57742df65a6a0b326a9db8f57988ba0a57bf67 → llama_cpp_python-0.2.16/.git/objects/36/90f40c28d3d9821712c70f68a25f5671bfcaa8 RENAMED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/45/a1513dde96b5d7f0e3b3a49fc3d7bcda8f7c6f ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/4d/c32b015468696f721ddb37a53d09cf5f9c7612 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/54/3365d8d631f36da2f57381801edabbc3ca4769 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/5b/51e98ce432974ff031367f8937babe755e3d73 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/61/027ef99725c50b0891fdbf0bf263a33abe648f ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/6c/3a6e594fab3a61940f00840cb717f53ea1e8b7 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/7b/01670640a150525c7671a7a3c1ae652a2d7b3d ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/81/d58f627258591fc76e28e8378d0f9c3d49c9e5 ADDED Viewed

Binary file

llama_cpp_python-0.2.15/.git/objects/5a/5e05ec3a528958a1f9c0c0865b768e68d65c55 → llama_cpp_python-0.2.16/.git/objects/8e/841233c07f9d6be8b4bf1e25231789a84781c0 RENAMED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/a0/b7d5b55cf67870c3efc3e5c42b96196d1f707c ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/a2/4e55042fd63aeb7e9873fff7474cc9141f4474 ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/b7/e60b66f47950e385980a1329af9dfb14da6906 ADDED Viewed

@@ -0,0 +1,4 @@
+x��Q�0��)��miwKb�r��,JB����%���If�Қ�T�9>բ
+ܣ%F�(6`p!1��&YQ
+N��}3�M�.ƞ}3R����%�\�t$��&!
+����x,C�	:��.p��7��,�|Ik����h=�1 ����%�n�|���u1_1�FW

llama_cpp_python-0.2.16/.git/objects/e0/b98f7ec76339ad83913015531541a7de9d8e1e ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/e2/1e0bd82d6cacf620ea2f2dd7e8e7e2ee34b42a ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/e6/f024107b7e75246ba7a7b083b2aafaada82697 ADDED Viewed

Binary file

llama_cpp_python-0.2.15/.git/objects/aa/e61ce85bebc9f295eb5eb2eaef77a045db5d6b → llama_cpp_python-0.2.16/.git/objects/f1/76c95ddb207e422703d8a73dd0d12a984a838f RENAMED Viewed

Binary file

llama_cpp_python-0.2.16/.git/objects/f7/2b9b39ef1e5d433ac15638f1090b96c582eb5b ADDED Viewed

Binary file

llama_cpp_python-0.2.16/.git/refs/tags/v0.2.16 ADDED Viewed

	@@ -0,0 +1 @@
1	+ b7e60b66f47950e385980a1329af9dfb14da6906

llama_cpp_python-0.2.16/.git/shallow ADDED Viewed

	@@ -0,0 +1 @@
1	+ b7e60b66f47950e385980a1329af9dfb14da6906

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/.github/workflows/build-and-release.yaml RENAMED Viewed

@@ -33,6 +33,9 @@ jobs:
       - name: Build wheels
         run: python -m cibuildwheel --output-dir wheelhouse
+        env:
+          # disable repair
+          CIBW_REPAIR_WHEEL_COMMAND: ""
       - uses: actions/upload-artifact@v3
         with:

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.2.16]
+- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
+- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d
+- Fix server doc arguments by @kjunggithub in #892
+- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380
+- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8
+- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829
+- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24
+- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a
+- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c
+- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617
 ## [0.2.15]
 - Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama_cpp_python
-Version: 0.2.15
+Version: 0.2.16
 Summary: Python bindings for the llama.cpp library
 Author-Email: Andrei Betlen <abetlen@gmail.com>
 License: MIT

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/docs/server.md RENAMED Viewed

@@ -34,6 +34,35 @@ NOTE: All server options are also available as environment variables. For exampl
 ## Guides
+### Code Completion
+`llama-cpp-python` supports code completion via GitHub Copilot.
+*NOTE*: Without GPU acceleration this is unlikely to be fast enough to be usable.
+You'll first need to download one of the available code completion models in GGUF format:
+- [replit-code-v1_5-GGUF](https://huggingface.co/abetlen/replit-code-v1_5-3b-GGUF)
+Then you'll need to run the OpenAI compatible web server with a increased context size substantially for GitHub Copilot requests:
+```bash
+python3 -m llama_cpp.server --model <model_path> --n_ctx 16192
+```
+Then just update your settings in `.vscode/settings.json` to point to your code completion server:
+```json
+{
+    // ...
+    "github.copilot.advanced": {
+        "debug.testOverrideProxyUrl": "http://<host>:<port>",
+        "debug.overrideProxyUrl": "http://<host>:<port>"
+    }
+    // ...
+}
+```
 ### Function Calling
 `llama-cpp-python` supports structured function calling based on a JSON schema.
@@ -45,7 +74,7 @@ You'll first need to download one of the available function calling models in GG
 Then when you run the server you'll need to also specify the `functionary-7b-v1` chat_format
 ```bash
-python3 -m llama_cpp.server --model <model_path> --chat-format functionary
+python3 -m llama_cpp.server --model <model_path> --chat_format functionary
 ```
 ### Multimodal Models
@@ -57,11 +86,12 @@ You'll first need to download one of the available multi-modal models in GGUF fo
 - [llava-v1.5-7b](https://huggingface.co/mys/ggml_llava-v1.5-7b)
 - [llava-v1.5-13b](https://huggingface.co/mys/ggml_llava-v1.5-13b)
+- [bakllava-1-7b](https://huggingface.co/mys/ggml_bakllava-1)
 Then when you run the server you'll need to also specify the path to the clip model used for image embedding and the `llava-1-5` chat_format
 ```bash
-python3 -m llama_cpp.server --model <model_path> --clip-model-path <clip_model_path> --chat-format llava-1-5
+python3 -m llama_cpp.server --model <model_path> --clip_model_path <clip_model_path> --chat_format llava-1-5
 ```
 Then you can just use the OpenAI API as normal

llama_cpp_python-0.2.16/examples/notebooks/Functions.ipynb ADDED Viewed

@@ -0,0 +1,400 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ChatCompletion(id='chatcmpl-b6dcbb47-1120-4761-8cd9-83542c97647b', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=\"The current temperature in San Francisco is 72 degrees Fahrenheit. It's a sunny day with clear skies, making it perfect for outdoor activities.\\n \", role='assistant', function_call=None, tool_calls=None))], created=1699602158, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=135, total_tokens=173))\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openai\n",
+    "import json\n",
+    "\n",
+    "\n",
+    "client = openai.OpenAI(\n",
+    "    api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n",
+    "    base_url = \"http://100.64.159.73:8000/v1\"\n",
+    ")\n",
+    "\n",
+    "# Example dummy function hard coded to return the same weather\n",
+    "# In production, this could be your backend API or an external API\n",
+    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
+    "    \"\"\"Get the current weather in a given location\"\"\"\n",
+    "    if \"tokyo\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
+    "    elif \"san francisco\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
+    "    elif \"paris\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
+    "    else:\n",
+    "        return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
+    "\n",
+    "def run_conversation():\n",
+    "    # Step 1: send the conversation and available functions to the model\n",
+    "    messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
+    "    tools = [\n",
+    "        {\n",
+    "            \"type\": \"function\",\n",
+    "            \"function\": {\n",
+    "                \"name\": \"get_current_weather\",\n",
+    "                \"description\": \"Get the current weather in a given location\",\n",
+    "                \"parameters\": {\n",
+    "                    \"type\": \"object\",\n",
+    "                    \"properties\": {\n",
+    "                        \"location\": {\n",
+    "                            \"type\": \"string\",\n",
+    "                            \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
+    "                        },\n",
+    "                        \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
+    "                    },\n",
+    "                    \"required\": [\"location\"],\n",
+    "                },\n",
+    "            },\n",
+    "        }\n",
+    "    ]\n",
+    "    response = client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo-1106\",\n",
+    "        messages=messages,\n",
+    "        tools=tools,\n",
+    "        tool_choice=\"auto\",  # auto is default, but we'll be explicit\n",
+    "    )\n",
+    "    response_message = response.choices[0].message\n",
+    "    tool_calls = response_message.tool_calls\n",
+    "    # Step 2: check if the model wanted to call a function\n",
+    "    if tool_calls:\n",
+    "        # Step 3: call the function\n",
+    "        # Note: the JSON response may not always be valid; be sure to handle errors\n",
+    "        available_functions = {\n",
+    "            \"get_current_weather\": get_current_weather,\n",
+    "        }  # only one function in this example, but you can have multiple\n",
+    "        messages.append(response_message)  # extend conversation with assistant's reply\n",
+    "        # Step 4: send the info for each function call and function response to the model\n",
+    "        for tool_call in tool_calls:\n",
+    "            function_name = tool_call.function.name\n",
+    "            function_to_call = available_functions[function_name]\n",
+    "            function_args = json.loads(tool_call.function.arguments)\n",
+    "            function_response = function_to_call(\n",
+    "                location=function_args.get(\"location\"),\n",
+    "                unit=function_args.get(\"unit\"),\n",
+    "            )\n",
+    "            messages.append(\n",
+    "                {\n",
+    "                    \"tool_call_id\": tool_call.id,\n",
+    "                    \"role\": \"tool\",\n",
+    "                    \"name\": function_name,\n",
+    "                    \"content\": function_response,\n",
+    "                }\n",
+    "            )  # extend conversation with function response\n",
+    "        second_response = client.chat.completions.create(\n",
+    "            model=\"gpt-3.5-turbo-1106\",\n",
+    "            messages=messages,\n",
+    "        )  # get a new response from the model where it can see the function response\n",
+    "        return second_response\n",
+    "print(run_conversation())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='Jason' age=25\n"
+     ]
+    }
+   ],
+   "source": [
+    "import instructor\n",
+    "from pydantic import BaseModel\n",
+    "\n",
+    "# Enables `response_model`\n",
+    "client = instructor.patch(client=client)\n",
+    "\n",
+    "class UserDetail(BaseModel):\n",
+    "    name: str\n",
+    "    age: int\n",
+    "\n",
+    "user = client.chat.completions.create(\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    response_model=UserDetail,\n",
+    "    messages=[\n",
+    "        {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "assert isinstance(user, UserDetail)\n",
+    "assert user.name == \"Jason\"\n",
+    "assert user.age == 25\n",
+    "\n",
+    "print(user)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import enum\n",
+    "\n",
+    "class Labels(str, enum.Enum):\n",
+    "    \"\"\"Enumeration for single-label text classification.\"\"\"\n",
+    "    SPAM = \"spam\"\n",
+    "    NOT_SPAM = \"not_spam\"\n",
+    "\n",
+    "class SinglePrediction(BaseModel):\n",
+    "    \"\"\"\n",
+    "    Class for a single class label prediction.\n",
+    "    \"\"\"\n",
+    "    class_label: Labels\n",
+    "\n",
+    "def classify(data: str) -> SinglePrediction:\n",
+    "    \"\"\"Perform single-label classification on the input text.\"\"\"\n",
+    "    return client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo-0613\",\n",
+    "        response_model=SinglePrediction,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": f\"Classify the following text: {data}\",\n",
+    "            },\n",
+    "        ],\n",
+    "    )  # type: ignore\n",
+    "\n",
+    "prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n",
+    "assert prediction.class_label == Labels.SPAM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "class_labels=[<MultiLabels.BILLING: 'billing'>, <MultiLabels.TECH_ISSUE: 'tech_issue'>]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from typing import List\n",
+    "\n",
+    "# Define Enum class for multiple labels\n",
+    "class MultiLabels(str, enum.Enum):\n",
+    "    TECH_ISSUE = \"tech_issue\"\n",
+    "    BILLING = \"billing\"\n",
+    "    GENERAL_QUERY = \"general_query\"\n",
+    "\n",
+    "# Define the multi-class prediction model\n",
+    "class MultiClassPrediction(BaseModel):\n",
+    "    \"\"\"\n",
+    "    Class for a multi-class label prediction.\n",
+    "    \"\"\"\n",
+    "    class_labels: List[MultiLabels]\n",
+    "\n",
+    "def multi_classify(data: str) -> MultiClassPrediction:\n",
+    "    \"\"\"Perform multi-label classification on the input text.\"\"\"\n",
+    "    return client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo-0613\",\n",
+    "        response_model=MultiClassPrediction,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": f\"Classify the following support ticket: {data}\",\n",
+    "            },\n",
+    "        ],\n",
+    "    )  # type: ignore\n",
+    "\n",
+    "# Test multi-label classification\n",
+    "ticket = \"My account is locked and I can't access my billing info.\"\n",
+    "prediction = multi_classify(ticket)\n",
+    "print(prediction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "question='What is the meaning of life?' answer='The meaning of life, according to the Devil, is to live a life of sin and debauchery.'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from typing_extensions import Annotated\n",
+    "from pydantic import BaseModel, BeforeValidator\n",
+    "\n",
+    "from instructor import llm_validator\n",
+    "\n",
+    "\n",
+    "question = \"What is the meaning of life?\"\n",
+    "context = \"The according to the devil the meaning of live is to live a life of sin and debauchery.\"\n",
+    "\n",
+    "class QuestionAnswer(BaseModel):\n",
+    "    question: str\n",
+    "    answer: str\n",
+    "\n",
+    "qa: QuestionAnswer = client.chat.completions.create(\n",
+    "    model=\"gpt-3.5-turbo\",\n",
+    "    response_model=QuestionAnswer,\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"system\",\n",
+    "            \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n",
+    "        },\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n",
+    "        },\n",
+    "    ],\n",
+    ")\n",
+    "print(qa)\n",
+    "\n",
+    "class QuestionAnswerNoEvil(BaseModel):\n",
+    "    question: str\n",
+    "    answer: Annotated[\n",
+    "        str,\n",
+    "        BeforeValidator(\n",
+    "            llm_validator(\"don't say objectionable things\", allow_override=True)\n",
+    "        ),\n",
+    "    ]\n",
+    "\n",
+    "try:\n",
+    "    qa: QuestionAnswerNoEvil = client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo\",\n",
+    "        response_model=QuestionAnswerNoEvil,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n",
+    "            },\n",
+    "        ],\n",
+    "    )\n",
+    "except Exception as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "question='What did the author do during college?' answer=[Fact(fact='The author, Jason Liu, studied Computational Mathematics and Physics in university.', substring_quote=['Computational Mathematics'])]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "from typing import List\n",
+    "\n",
+    "from pydantic import Field, BaseModel, model_validator, FieldValidationInfo\n",
+    "\n",
+    "class Fact(BaseModel):\n",
+    "    fact: str = Field(...)\n",
+    "    substring_quote: List[str] = Field(...)\n",
+    "\n",
+    "    @model_validator(mode=\"after\")\n",
+    "    def validate_sources(self, info: FieldValidationInfo) -> \"Fact\":\n",
+    "        text_chunks = info.context.get(\"text_chunk\", None)\n",
+    "        spans = list(self.get_spans(text_chunks))\n",
+    "        self.substring_quote = [text_chunks[span[0] : span[1]] for span in spans]\n",
+    "        return self\n",
+    "\n",
+    "    def get_spans(self, context):\n",
+    "        for quote in self.substring_quote:\n",
+    "            yield from self._get_span(quote, context)\n",
+    "\n",
+    "    def _get_span(self, quote, context):\n",
+    "        for match in re.finditer(re.escape(quote), context):\n",
+    "            yield match.span()\n",
+    "\n",
+    "class QuestionAnswer(BaseModel):\n",
+    "    question: str = Field(...)\n",
+    "    answer: List[Fact] = Field(...)\n",
+    "\n",
+    "    @model_validator(mode=\"after\")\n",
+    "    def validate_sources(self) -> \"QuestionAnswer\":\n",
+    "        self.answer = [fact for fact in self.answer if len(fact.substring_quote) > 0]\n",
+    "        return self\n",
+    "\n",
+    "\n",
+    "def ask_ai(question: str, context: str) -> QuestionAnswer:\n",
+    "    return client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo-0613\",\n",
+    "        temperature=0.0,\n",
+    "        response_model=QuestionAnswer,\n",
+    "        messages=[\n",
+    "            {\"role\": \"system\", \"content\": \"You are a world class algorithm to answer questions with correct and exact citations.\"},\n",
+    "            {\"role\": \"user\", \"content\": f\"{context}\"},\n",
+    "            {\"role\": \"user\", \"content\": f\"Question: {question}\"}\n",
+    "        ],\n",
+    "        validation_context={\"text_chunk\": context},\n",
+    "    )\n",
+    "\n",
+    "question = \"What did the author do during college?\"\n",
+    "context = \"\"\"\n",
+    "My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n",
+    "I went to an arts high school but in university I studied Computational Mathematics and physics.\n",
+    "As part of coop I worked at many companies including Stitchfix, Facebook.\n",
+    "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n",
+    "\"\"\"\n",
+    "\n",
+    "qa = ask_ai(question, context)\n",
+    "print(qa)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python-3.8.10",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5+"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

{llama_cpp_python-0.2.15 → llama_cpp_python-0.2.16}/llama_cpp/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
-__version__ = "0.2.15"
+__version__ = "0.2.16"

llama-cpp-python 0.2.15__tar.gz → 0.2.16__tar.gz

llama-cpp-python 0.2.15tar.gz → 0.2.16tar.gz