PyPI - llguidance - Versions diffs - 0.7.9__tar.gz → 0.7.10__tar.gz - Mend

llguidance 0.7.9tar.gz → 0.7.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

{llguidance-0.7.9 → llguidance-0.7.10}/Cargo.lock RENAMED Viewed

@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
 [[package]]
 name = "llguidance"
-version = "0.7.9"
+version = "0.7.10"
 dependencies = [
  "anyhow",
  "derivre",
@@ -1196,7 +1196,7 @@ dependencies = [
 [[package]]
 name = "llguidance_py"
-version = "0.7.9"
+version = "0.7.10"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -2356,7 +2356,7 @@ dependencies = [
 [[package]]
 name = "toktrie"
-version = "0.7.9"
+version = "0.7.10"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -2367,7 +2367,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_downloader"
-version = "0.7.9"
+version = "0.7.10"
 dependencies = [
  "anyhow",
  "hf-hub",
@@ -2378,7 +2378,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_tokenizers"
-version = "0.7.9"
+version = "0.7.10"
 dependencies = [
  "anyhow",
  "log",

{llguidance-0.7.9 → llguidance-0.7.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llguidance
-Version: 0.7.9
+Version: 0.7.10
 License-File: LICENSE
 Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
 Author: Michal Moskal
@@ -18,6 +18,18 @@ Project-URL: issue_tracker, https://github.com/microsoft/llguidance/issues
     <em>Performance results from <a href ="https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench">MaskBench</a></em>
 </p>
+---
+* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
+* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
+* 2025-02-01 integration [merged](https://github.com/ggml-org/llama.cpp/pull/10224) into llama.cpp (b4613)
+* 2025-01-21 [JSONSchemaBench](https://github.com/guidance-ai/jsonschemabench) released, including [paper](https://arxiv.org/abs/2501.10868) and [MaskBench](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench)
+* 2025-01-07 Guidance [v0.2.0](https://github.com/guidance-ai/guidance/releases/tag/0.2.0) released, using llguidance as the grammar engine
+---
+## About
 This library implements constrained decoding (also called constrained sampling or
 structured outputs) for Large Langauge Models (LLMs).
 It can enforce arbitrary context-free grammar on the output of LLM
@@ -52,7 +64,7 @@ The library is currently integrated in:
 - [SGLang](https://github.com/sgl-project/sglang/pull/3298) -
   use `--grammar-backend llguidance`; when passing Lark grammar make
   sure to prefix them with `%llguidance {}`, just as in llama.cpp
-- vLLM - [merged V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [pending V1 PR](https://github.com/vllm-project/vllm/pull/14779)
+- vLLM - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
 - [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
 - [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)

{llguidance-0.7.9 → llguidance-0.7.10}/README.md RENAMED Viewed

@@ -6,6 +6,18 @@
     <em>Performance results from <a href ="https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench">MaskBench</a></em>
 </p>
+---
+* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
+* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
+* 2025-02-01 integration [merged](https://github.com/ggml-org/llama.cpp/pull/10224) into llama.cpp (b4613)
+* 2025-01-21 [JSONSchemaBench](https://github.com/guidance-ai/jsonschemabench) released, including [paper](https://arxiv.org/abs/2501.10868) and [MaskBench](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench)
+* 2025-01-07 Guidance [v0.2.0](https://github.com/guidance-ai/guidance/releases/tag/0.2.0) released, using llguidance as the grammar engine
+---
+## About
 This library implements constrained decoding (also called constrained sampling or
 structured outputs) for Large Langauge Models (LLMs).
 It can enforce arbitrary context-free grammar on the output of LLM
@@ -40,7 +52,7 @@ The library is currently integrated in:
 - [SGLang](https://github.com/sgl-project/sglang/pull/3298) -
   use `--grammar-backend llguidance`; when passing Lark grammar make
   sure to prefix them with `%llguidance {}`, just as in llama.cpp
-- vLLM - [merged V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [pending V1 PR](https://github.com/vllm-project/vllm/pull/14779)
+- vLLM - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
 - [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
 - [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)

{llguidance-0.7.9 → llguidance-0.7.10}/parser/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "llguidance"
-version = "0.7.9"
+version = "0.7.10"
 edition = "2021"
 license = "MIT"
 description = "Super-fast Structured Outputs"

{llguidance-0.7.9 → llguidance-0.7.10}/parser/llguidance.h RENAMED Viewed

@@ -7,8 +7,26 @@
 #include <stdint.h>
 #include <stdlib.h>
+/**
+ * Do not include special tokens, and keep invalid UTF-8 as is.
+ */
+#define LLG_DECODE_NONE 0
+/**
+ * Include special tokens in the output.
+ * They may look like <|something|>, <something_else>, or <[12345]> if they don't have a name.
+ */
+#define LLG_DECODE_INCLUDE_SPECIAL 1
+/**
+ * Replace invalid UTF-8 with the replacement character.
+ */
+#define LLG_DECODE_VALID_UTF8 2
 typedef struct LlgConstraint LlgConstraint;
+typedef struct LlgMatcher LlgMatcher;
 typedef struct LlgStopController LlgStopController;
 typedef struct LlgTokenizer LlgTokenizer;
@@ -223,7 +241,7 @@ void llg_constraint_init_set_defaults(struct LlgConstraintInit *init,
  * Always returns a non-null value. Call llg_get_error() on the result to check for errors.
  */
 struct LlgConstraint *llg_new_constraint(const struct LlgConstraintInit *init,
-                                         const char *grammar_json);
+                                         const char *llguidance);
 /**
  * Create a new constraint from a given regular expression
@@ -291,8 +309,6 @@ int32_t llg_commit_token(struct LlgConstraint *cc, LlgToken token, struct LlgCom
 /**
  * Compute mask for several constraints in parallel.
- * # Safety
- * This function should only be called from C code.
  */
 void llg_par_compute_mask(const struct LlgConstraintStep *steps,
                           size_t n_steps,
@@ -321,8 +337,6 @@ struct LlgTokenizer *llg_clone_tokenizer(const struct LlgTokenizer *tok);
  * Tokenize the given bytes and return the tokens.
  * Always returns the number of tokens that would be written to output_tokens
  * if output_tokens_len was large enough.
- * # Safety
- * This function should only be called from C code.
  */
 size_t llg_tokenize_bytes(const struct LlgTokenizer *tok,
                           const uint8_t *bytes,
@@ -335,8 +349,6 @@ size_t llg_tokenize_bytes(const struct LlgTokenizer *tok,
  * Special tokens will be tokenized, if they follow 0xFF byte prefix.
  * Always returns the number of tokens that would be written to output_tokens
  * if output_tokens_len was large enough.
- * # Safety
- * This function should only be called from C code.
  */
 size_t llg_tokenize_bytes_marker(const struct LlgTokenizer *tok,
                                  const uint8_t *bytes,
@@ -346,10 +358,8 @@ size_t llg_tokenize_bytes_marker(const struct LlgTokenizer *tok,
 /**
  * Return a string representation of the tokens, useful for debugging.
- * The output is null-terminated.
+ * The output is NUL-terminated.
  * Returns the number of bytes that would be written to output if output_len was large enough.
- * # Safety
- * This function should only be called from C code.
  */
 size_t llg_stringify_tokens(const struct LlgTokenizer *tok,
                             const uint32_t *tokens,
@@ -357,17 +367,26 @@ size_t llg_stringify_tokens(const struct LlgTokenizer *tok,
                             char *output,
                             size_t output_len);
+/**
+ * Return a string representation of the tokens, useful for debugging.
+ * The output is NUL-terminated.
+ * Returns the number of bytes that would be written to output if output_len was large enough.
+ * flags is one of LLG_DECODE_*
+ */
+size_t llg_decode_tokens(const struct LlgTokenizer *tok,
+                         const uint32_t *tokens,
+                         size_t n_tokens,
+                         char *output,
+                         size_t output_len,
+                         uint32_t flags);
 /**
  * Free the tokenizer. Should *NOT* be called while there are still constraints using it.
- * # Safety
- * This function should only be called from C code.
  */
 void llg_free_tokenizer(struct LlgTokenizer *tok);
 /**
  * Free the constraint
- * # Safety
- * This function should only be called from C code.
  */
 void llg_free_constraint(struct LlgConstraint *cc);
@@ -381,8 +400,6 @@ const char *llg_flush_logs(struct LlgConstraint *cc);
 /**
  * Create a new stop-sequence controller
- * # Safety
- * This function should only be called from C code.
  */
 struct LlgStopController *llg_new_stop_controller(const struct LlgTokenizer *tokenizer,
                                                   const uint32_t *stop_tokens,
@@ -404,11 +421,132 @@ const char *llg_stop_commit_token(struct LlgStopController *stop_ctrl,
 /**
  * Free the stop-sequence controller
- * # Safety
- * This function should only be called from C code.
  */
 void llg_free_stop_controller(struct LlgStopController *stop_ctrl);
+/**
+ * Create a new matcher from the given ConstraintInit
+ * Always returns a non-null value. Call llg_matcher_get_error() on the result to check for errors.
+ * init.ff_tokens_ok and init.backtrack_ok are ignored
+ * (backtracking is always disabled, and ff_tokens can be retrieved using llg_matcher_compute_ff_tokens()).
+ * The data is of different format, depending on constraint_type:
+ * - "regex" - data is regular expression in rust regex format
+ *   see https://docs.rs/regex/latest/regex/#syntax
+ * - "json" or "json_schema" - data is (stringifed) JSON schema
+ *   see https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md
+ * - "json_object" - equivalent to JSON schema: {"type":"object"}
+ * - "lark" - data is grammar in a variant of Lark syntax
+ *   see https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md
+ * - "llguidance" or "guidance" - data is a list of Lark or JSON schemas in JSON format
+ */
+struct LlgMatcher *llg_new_matcher(const struct LlgConstraintInit *init,
+                                   const char *constraint_type,
+                                   const char *data);
+/**
+ * Compute the set of allowed tokens for the current state.
+ * The result is written to mask_dest.
+ * mask_byte_len must be equal to llg_matcher_get_mask_byte_size().
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_compute_mask_into(struct LlgMatcher *matcher,
+                                      uint32_t *mask_dest,
+                                      size_t mask_byte_len);
+/**
+ * Compute the set of allowed tokens for the current state.
+ * The pointer to the result is written to mask_dest.
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_compute_mask(struct LlgMatcher *matcher);
+/**
+ * Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
+ */
+const uint32_t *llg_matcher_get_mask(struct LlgMatcher *matcher);
+/**
+ * Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
+ */
+size_t llg_matcher_get_mask_byte_size(struct LlgMatcher *matcher);
+/**
+ * Advance the matcher by one token.
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_consume_token(struct LlgMatcher *matcher, uint32_t token);
+/**
+ * Advance the matcher by several tokens.
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_consume_tokens(struct LlgMatcher *matcher,
+                                   const uint32_t *tokens,
+                                   size_t n_tokens);
+/**
+ * Get the error message from the matcher or null if there is no error.
+ * After it returns a non-null value, it will always return it until the matcher is freed
+ * using llg_free_matcher() (at which point the pointer will be invalid).
+ */
+const char *llg_matcher_get_error(struct LlgMatcher *matcher);
+/**
+ * Check if the matcher is in an error state.
+ */
+bool llg_matcher_is_error(struct LlgMatcher *matcher);
+/**
+ * Free the matcher.
+ */
+void llg_free_matcher(struct LlgMatcher *matcher);
+/**
+ * Backtracks the matcher states by num_tokens.
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_rollback(struct LlgMatcher *matcher, size_t num_tokens);
+/**
+ * Resets the matcher to the initial state.
+ * A matcher in error state cannot be reset.
+ * Returns 0 on success and -1 on error.
+ */
+int32_t llg_matcher_reset(struct LlgMatcher *matcher);
+/**
+ * Check if the grammar can fully accept the input.
+ */
+bool llg_matcher_is_accepting(struct LlgMatcher *matcher);
+/**
+ * Check if the matcher will force EOS token.
+ * This returns true also in error state, as that is a forced stop.
+ */
+bool llg_matcher_is_stopped(const struct LlgMatcher *matcher);
+/**
+ * Check how many tokens can be consumed from the given tokens.
+ * Returns the number of tokens that can be consumed, or -1 on error.
+ */
+int32_t llg_matcher_validate_tokens(struct LlgMatcher *matcher,
+                                    const uint32_t *tokens,
+                                    size_t n_tokens);
+/**
+ * Compute the fast-forward (forced) tokens for the current state.
+ * The result is written to output.
+ * Returns the number of tokens written to output (which can be 0) or -1 on error.
+ */
+int32_t llg_matcher_compute_ff_tokens(struct LlgMatcher *matcher,
+                                      uint32_t *output,
+                                      size_t output_len);
+/**
+ * Clone the matcher.
+ */
+struct LlgMatcher *llg_clone_matcher(const struct LlgMatcher *matcher);
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus

{llguidance-0.7.9 → llguidance-0.7.10}/parser/src/api.rs RENAMED Viewed

@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use anyhow::{bail, Result};
 use derivre::RegexAst;
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
+use serde_json::{json, Value};
 use crate::{
     earley::{lexerspec::LexerSpec, Grammar},
@@ -294,6 +294,26 @@ impl TopLevelGrammar {
             max_tokens: None,
         }
     }
+    /// The data is of different format, depending on tag:
+    /// - "regex" - data is regular expression in rust regex format
+    ///   see https://docs.rs/regex/latest/regex/#syntax
+    /// - "json" or "json_schema" - data is (stringifed) JSON schema
+    ///   see https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md
+    /// - "json_object" - equivalent to JSON schema: {"type":"object"}
+    /// - "lark" - data is grammar in a variant of Lark syntax
+    ///   see https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md
+    /// - "llguidance" or "guidance" - data is a list of Lark or JSON schemas in JSON format
+    pub fn from_tagged_str(tag: &str, data: &str) -> Result<Self> {
+        match tag {
+            "regex" => Ok(Self::from_regex(data)),
+            "json" | "json_schema" => Ok(Self::from_json_schema(serde_json::from_str(data)?)),
+            "json_object" => Ok(Self::from_json_schema(json!({"type": "object"}))),
+            "lark" => Ok(Self::from_lark(data.to_string())),
+            "llguidance" | "guidance" => Self::from_lark_or_grammar_list(data),
+            _ => bail!("unknown constraint type: {tag}"),
+        }
+    }
 }
 impl GrammarWithLexer {

llguidance 0.7.9__tar.gz → 0.7.10__tar.gz

llguidance 0.7.9tar.gz → 0.7.10tar.gz