PyPI - chaine - Versions diffs - 3.13.1__cp311-cp311-macosx_11_0_arm64.whl - Mend

chaine 3.13.1__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chaine might be problematic. Click here for more details.

Files changed (68) hide show

chaine/__init__.py +2 -0
chaine/_core/crf.cpp +19854 -0
chaine/_core/crf.cpython-311-darwin.so +0 -0
chaine/_core/crf.pyx +271 -0
chaine/_core/crfsuite/COPYING +27 -0
chaine/_core/crfsuite/README +183 -0
chaine/_core/crfsuite/include/crfsuite.h +1077 -0
chaine/_core/crfsuite/include/crfsuite.hpp +649 -0
chaine/_core/crfsuite/include/crfsuite_api.hpp +406 -0
chaine/_core/crfsuite/include/os.h +65 -0
chaine/_core/crfsuite/lib/cqdb/COPYING +28 -0
chaine/_core/crfsuite/lib/cqdb/include/cqdb.h +518 -0
chaine/_core/crfsuite/lib/cqdb/src/cqdb.c +639 -0
chaine/_core/crfsuite/lib/cqdb/src/lookup3.c +1271 -0
chaine/_core/crfsuite/lib/cqdb/src/main.c +184 -0
chaine/_core/crfsuite/lib/crf/src/crf1d.h +354 -0
chaine/_core/crfsuite/lib/crf/src/crf1d_context.c +788 -0
chaine/_core/crfsuite/lib/crf/src/crf1d_encode.c +1020 -0
chaine/_core/crfsuite/lib/crf/src/crf1d_feature.c +382 -0
chaine/_core/crfsuite/lib/crf/src/crf1d_model.c +1085 -0
chaine/_core/crfsuite/lib/crf/src/crf1d_tag.c +582 -0
chaine/_core/crfsuite/lib/crf/src/crfsuite.c +500 -0
chaine/_core/crfsuite/lib/crf/src/crfsuite_internal.h +233 -0
chaine/_core/crfsuite/lib/crf/src/crfsuite_train.c +302 -0
chaine/_core/crfsuite/lib/crf/src/dataset.c +115 -0
chaine/_core/crfsuite/lib/crf/src/dictionary.c +127 -0
chaine/_core/crfsuite/lib/crf/src/holdout.c +83 -0
chaine/_core/crfsuite/lib/crf/src/json.c +1497 -0
chaine/_core/crfsuite/lib/crf/src/json.h +120 -0
chaine/_core/crfsuite/lib/crf/src/logging.c +85 -0
chaine/_core/crfsuite/lib/crf/src/logging.h +49 -0
chaine/_core/crfsuite/lib/crf/src/params.c +370 -0
chaine/_core/crfsuite/lib/crf/src/params.h +84 -0
chaine/_core/crfsuite/lib/crf/src/quark.c +180 -0
chaine/_core/crfsuite/lib/crf/src/quark.h +46 -0
chaine/_core/crfsuite/lib/crf/src/rumavl.c +1178 -0
chaine/_core/crfsuite/lib/crf/src/rumavl.h +144 -0
chaine/_core/crfsuite/lib/crf/src/train_arow.c +409 -0
chaine/_core/crfsuite/lib/crf/src/train_averaged_perceptron.c +237 -0
chaine/_core/crfsuite/lib/crf/src/train_l2sgd.c +491 -0
chaine/_core/crfsuite/lib/crf/src/train_lbfgs.c +323 -0
chaine/_core/crfsuite/lib/crf/src/train_passive_aggressive.c +442 -0
chaine/_core/crfsuite/lib/crf/src/vecmath.h +360 -0
chaine/_core/crfsuite/swig/crfsuite.cpp +1 -0
chaine/_core/crfsuite_api.pxd +67 -0
chaine/_core/liblbfgs/COPYING +22 -0
chaine/_core/liblbfgs/README +71 -0
chaine/_core/liblbfgs/include/lbfgs.h +745 -0
chaine/_core/liblbfgs/lib/arithmetic_ansi.h +142 -0
chaine/_core/liblbfgs/lib/arithmetic_sse_double.h +303 -0
chaine/_core/liblbfgs/lib/arithmetic_sse_float.h +312 -0
chaine/_core/liblbfgs/lib/lbfgs.c +1531 -0
chaine/_core/tagger_wrapper.hpp +58 -0
chaine/_core/trainer_wrapper.cpp +32 -0
chaine/_core/trainer_wrapper.hpp +26 -0
chaine/crf.py +505 -0
chaine/logging.py +214 -0
chaine/optimization/__init__.py +10 -0
chaine/optimization/metrics.py +129 -0
chaine/optimization/spaces.py +394 -0
chaine/optimization/trial.py +103 -0
chaine/optimization/utils.py +119 -0
chaine/training.py +184 -0
chaine/typing.py +18 -0
chaine/validation.py +43 -0
chaine-3.13.1.dist-info/METADATA +348 -0
chaine-3.13.1.dist-info/RECORD +68 -0
chaine-3.13.1.dist-info/WHEEL +5 -0

chaine/training.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""
+chaine.training
+~~~~~~~~~~~~~~~
+This module implements the high-level API to train a conditional random field.
+"""
+from chaine.crf import HyperparameterOptimizer, Model, Trainer
+from chaine.logging import Logger, set_verbosity
+from chaine.typing import Filepath, Iterable, Labels, Sequence
+LOGGER = Logger(__name__)
+def train(
+    dataset: Iterable[Sequence],
+    labels: Iterable[Labels],
+    *,
+    model_filepath: Filepath = "model.chaine",
+    optimize_hyperparameters: bool = False,
+    optimization_sample_size: int | None = None,
+    verbose: int = 1,
+    **hyperparameters,
+) -> Model:
+    """Train a conditional random field.
+    Parameters
+    ----------
+    dataset : Iterable[Sequence]
+        Data set consisting of sequences of feature sets.
+    labels : Iterable[Labels]
+        Labels corresponding to each instance in the data set.
+    model_filepath : Filepath, optional (default=model.chaine)
+        Path to model location.
+    optimize_hyperparameters : bool
+        If True, optimize hyperparameters first.
+    optimization_sample_size : int | None
+        Number of instances to sample from the data set for hyperparameter optimization.
+    verbose : int
+        Controls the verbosity: the higher, the more messages.
+    algorithm : str
+        The following optimization algorithms are available:
+            * lbfgs: Limited-memory BFGS with L1/L2 regularization
+            * l2sgd: Stochastic gradient descent with L2 regularization
+            * ap: Averaged perceptron
+            * pa: Passive aggressive
+            * arow: Adaptive regularization of weights
+    Limited-memory BFGS Parameters (lbfgs)
+    --------------------------------------
+    min_freq : float, optional (default=0)
+        Threshold value for minimum frequency of a feature occurring in training data.
+    all_possible_states : bool, optional (default=False)
+        Generate state features that do not even occur in the training data.
+    all_possible_transitions : bool, optional (default=False)
+        Generate transition features that do not even occur in the training data.
+    max_iterations : int, optional (default=None)
+        Maximum number of iterations (unlimited by default).
+    num_memories : int, optional (default=6)
+        Number of limited memories for approximating the inverse hessian matrix.
+    c1 : float, optional (default=0)
+        Coefficient for L1 regularization.
+    c2 : float, optional (default=1.0)
+        Coefficient for L2 regularization.
+    epsilon : float, optional (default=1e-5)
+        Parameter that determines the condition of convergence.
+    period : int, optional (default=10)
+        Threshold value for iterations to test the stopping criterion.
+    delta : float, optional (default=1e-5)
+        Top iteration when log likelihood is not greater than this.
+    linesearch : str, optional (default="MoreThuente")
+        Line search algorithm used in updates:
+            * MoreThuente: More and Thuente's method
+            * Backtracking: Backtracking method with regular Wolfe condition
+            * StrongBacktracking: Backtracking method with strong Wolfe condition
+    max_linesearch : int, optional (default=20)
+        Maximum number of trials for the line search algorithm.
+    SGD with L2 Parameters (l2sgd)
+    ------------------------------
+    min_freq : float, optional (default=0)
+        Threshold value for minimum frequency of a feature occurring in training data.
+    all_possible_states : bool, optional (default=False)
+        Generate state features that do not even occur in the training data.
+    all_possible_transitions : bool, optional (default=False)
+        Generate transition features that do not even occur in the training data.
+    max_iterations : int, optional (default=None)
+        Maximum number of iterations (1000 by default).
+    c2 : float, optional (default=1.0)
+        Coefficient for L2 regularization.
+    period : int, optional (default=10)
+        Threshold value for iterations to test the stopping criterion.
+    delta : float, optional (default=1e-5)
+        Top iteration when log likelihood is not greater than this.
+    calibration_eta : float, optional (default=0.1)
+        Initial value of learning rate (eta) used for calibration.
+    calibration_rate : float, optional (default=2.0)
+        Rate of increase/decrease of learning rate for calibration.
+    calibration_samples : int, optional (default=1000)
+        Number of instances used for calibration.
+    calibration_candidates : int, optional (default=10)
+        Number of candidates of learning rate.
+    calibration_max_trials : int, optional (default=20)
+        Maximum number of trials of learning rates for calibration.
+    Averaged Perceptron Parameters (ap)
+    -----------------------------------
+    min_freq : float, optional (default=0)
+        Threshold value for minimum frequency of a feature occurring in training data.
+    all_possible_states : bool, optional (default=False)
+        Generate state features that do not even occur in the training data.
+    all_possible_transitions : bool, optional (default=False)
+        Generate transition features that do not even occur in the training data.
+    max_iterations : int, optional (default=None)
+        Maximum number of iterations (100 by default).
+    epsilon : float, optional (default=1e-5)
+        Parameter that determines the condition of convergence.
+    Passive Aggressive Parameters (pa)
+    ----------------------------------
+    min_freq : float, optional (default=0)
+        Threshold value for minimum frequency of a feature occurring in training data.
+    all_possible_states : bool, optional (default=False)
+        Generate state features that do not even occur in the training data.
+    all_possible_transitions : bool, optional (default=False)
+        Generate transition features that do not even occur in the training data.
+    max_iterations : int, optional (default=None)
+        Maximum number of iterations (100 by default).
+    epsilon : float, optional (default=1e-5)
+        Parameter that determines the condition of convergence.
+    pa_type : int, optional (default=1)
+        Strategy for updating feature weights:
+            * 0: PA without slack variables
+            * 1: PA type I
+            * 2: PA type II
+    c : float, optional (default=1)
+        Aggressiveness parameter (used only for PA-I and PA-II).
+    error_sensitive : bool, optional (default=True)
+        Include square root of predicted incorrect labels into optimization routine.
+    averaging : bool, optional (default=True)
+        Compute average of feature weights at all updates.
+    Adaptive Regularization of Weights Parameters (arow)
+    ----------------------------------------------------
+    min_freq : float, optional (default=0)
+        Threshold value for minimum frequency of a feature occurring in training data.
+    all_possible_states : bool, optional (default=False)
+        Generate state features that do not even occur in the training data.
+    all_possible_transitions : bool, optional (default=False)
+        Generate transition features that do not even occur in the training data.
+    max_iterations : int, optional (default=None)
+        Maximum number of iterations (100 by default).
+    epsilon : float, optional (default=1e-5)
+        Parameter that determines the condition of convergence.
+    variance : float, optional (default=1)
+        Initial variance of every feature weight.
+    gamma : float, optional (default=1)
+        Trade-off between loss function and changes of feature weights.
+    Returns
+    -------
+    Model
+        A conditional random field trained on the dataset.
+    """
+    set_verbosity(verbose)
+    if optimize_hyperparameters:
+        if hyperparameters:
+            LOGGER.warning(f"Specified hyperparameters will be overwritten: {hyperparameters}")
+        # optionally tune hyperparameters first
+        optimizer = HyperparameterOptimizer()
+        results = optimizer.optimize_hyperparameters(dataset, labels, optimization_sample_size)
+        # use hyperparameters of the best run
+        hyperparameters = results[0]["hyperparameters"]
+    # initialize trainer and start training
+    trainer = Trainer(**hyperparameters)
+    trainer.train(dataset, labels, model_filepath=str(model_filepath))
+    # load and return the trained model
+    return Model(model_filepath)

chaine/typing.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+chaine.typing
+~~~~~~~~~~~~~
+A collection of type hints.
+"""
+from os import PathLike
+from pathlib import Path
+from typing import Any, Iterable, Iterator
+Sequence = Iterable[dict[str, str | int | float | bool]]
+Labels = Iterable[str]
+Filepath = Path | PathLike | str
+Sentence = list[str]
+Tags = list[str]
+Features = dict[str, float | int | str | bool]
+Dataset = dict[str, dict[str, Any]]

chaine/validation.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+chaine.validation
+~~~~~~~~~~~~~~~~~
+This module implements functions to validate input sequences (either for training or inference).
+"""
+from chaine.typing import Sequence
+# supported feature value data types
+TYPES = (str, int, float, bool)
+def is_valid_sequence(sequence: Sequence) -> bool:
+    """Check if the given sequence has valid input format.
+    Parameters
+    ----------
+    sequence : Sequence
+        Sequence to validate.
+    Returns
+    -------
+    bool
+        True if sequence is valid, False otherwise.
+    """
+    return isinstance(sequence, list) and all(is_valid_token(token) for token in sequence)
+def is_valid_token(token: dict) -> bool:
+    """Check if the given token has valid input format.
+    Parameters
+    ----------
+    token : dict
+        Token to validate.
+    Returns
+    -------
+    bool
+        True if sequence is valid, False otherwise.
+    """
+    return isinstance(token, dict) and all(isinstance(value, TYPES) for value in token.values())

chaine-3.13.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,348 @@
+Metadata-Version: 2.1
+Name: chaine
+Version: 3.13.1
+Summary: Linear-chain conditional random fields for natural language processing
+Author: Severin Simmler
+Author-email: s.simmler@snapaddy.com
+Requires-Python: >=3.10,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Description-Content-Type: text/markdown
+# Chaine
+[![downloads](https://static.pepy.tech/personalized-badge/chaine?period=total&units=international_system&left_color=black&right_color=black&left_text=downloads)](https://pepy.tech/project/chaine)
+[![downloads/month](https://static.pepy.tech/personalized-badge/chaine?period=month&units=abbreviation&left_color=black&right_color=black&left_text=downloads/month)](https://pepy.tech/project/chaine)
+[![downloads/week](https://static.pepy.tech/personalized-badge/chaine?period=week&units=abbreviation&left_color=black&right_color=black&left_text=downloads/week)](https://pepy.tech/project/chaine)
+Chaine is a modern, fast and lightweight Python library implementing **linear-chain conditional random fields**. Use it for sequence labeling tasks like [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) or [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging).
+The main goals of this project are:
+- **Usability**: Designed with special focus on usability and a beautiful high-level API.
+- **Efficiency**: Performance critical parts are written in C and thus [blazingly fast](http://www.chokkan.org/software/crfsuite/benchmark.html). Loading a model from disk and retrieving feature weights for inference is optimized for both [speed and memory](http://www.chokkan.org/software/cqdb/).
+- **Persistency**: No `pickle` or `joblib` is used for serialization. A trained model will be compatible with all versions for eternity, because the underlying C library will not change. I promise.
+- **Compatibility**: There are wheels for Linux, macOS and Windows. No compiler needed.
+- **Minimalism**: No code bloat, no external dependencies.
+Install the latest stable version from [PyPI](https://pypi.org/project/chaine):
+```
+pip install chaine
+```
+### Table of contents
+- [Algorithms](#algorithms)
+- [Usage](#usage)
+  - [Features](#features)
+  - [Training](#training)
+  - [Hyperparameters](#hyperparameters)
+  - [Inference](#inference)
+  - [Weights](#weights)
+- [Credits](#credits)
+## Algorithms
+You can train models using the following methods:
+- Limited-Memory BFGS ([Nocedal 1980](https://www.jstor.org/stable/2006193))
+- Orthant-Wise Limited-Memory Quasi-Newton ([Andrew et al. 2007](https://www.microsoft.com/en-us/research/publication/scalable-training-of-l1-regularized-log-linear-models/))
+- Stochastic Gradient Descent ([Shalev et al. 2007](https://www.google.com/url?q=https://www.cs.huji.ac.il/~shais/papers/ShalevSiSr07.pdf))
+- Averaged Perceptron ([Collins 2002](https://aclanthology.org/W02-1001.pdf))
+- Passive Aggressive ([Crammer et al. 2006](https://jmlr.csail.mit.edu/papers/v7/crammer06a.html))
+- Adaptive Regularization of Weight Vectors ([Mejer et al. 2010](https://aclanthology.org/D10-1095.pdf))
+Please refer to the paper by [Lafferty et al.](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers) for a general introduction to **conditional random fields** or the respective chapter in [Speech and Language Processing](https://web.stanford.edu/~jurafsky/slp3/8.pdf).
+## Usage
+Training and using a **conditional random field** for inference is easy as:
+```python
+>>> import chaine
+>>> tokens = [[{"index": 0, "text": "John"}, {"index": 1, "text": "Lennon"}]]
+>>> labels = [["B-PER", "I-PER"]]
+>>> model = chaine.train(tokens, labels)
+>>> model.predict(tokens)
+[['B-PER', 'I-PER']]
+```
+> You can control verbosity with the argument `verbose`, where `0` will set the log level to `ERROR`, `1` to `INFO` (which is the default) and `2` to `DEBUG`.
+### Features
+One token in a sequence is represented as a dictionary with describing feature names as keys and respective values of type string, integer, float or boolean:
+```python
+{
+    "text": "John",
+    "num_characters": 4,
+    "relative_index": 0.0,
+    "is_number": False,
+}
+```
+One sequence is represented as a list of feature dictionaries:
+```python
+[
+    {"text": "John", "num_characters": 4},
+    {"text": "Lennon", "num_characters": 6}
+]
+```
+One data set is represented as an iterable of a list of feature dictionaries:
+```python
+[
+    [
+        {"text": "John", "num_characters": 4},
+        {"text": "Lennon", "num_characters": 6}
+    ],
+    [
+        {"text": "Paul", "num_characters": 4},
+        {"text": "McCartney", "num_characters": 9}
+    ],
+    ...
+]
+```
+This is the expected input format for training. For inference, you can also process a single sequence rather than a batch of multiple sequences.
+#### Generators
+Depending on the size of your data set, it probably makes sense to use generators. Something like this would be totally fine for both training and inference:
+```python
+([extract_features(token) for token in tokens] for tokens in dataset)
+```
+Assuming `dataset` is a generator as well, only one sequence is loaded into memory at a time.
+### Training
+You can either use the high-level function to train a model (which also loads and returns it):
+```python
+>>> import chaine
+>>> chaine.train(tokens, labels)
+```
+or the lower-level `Trainer` class:
+```python
+>>> from chaine import Trainer
+>>> trainer = Trainer()
+```
+A `Trainer` object has a method `train()` to learn states and transitions from the given data set. You have to provide a filepath to serialize the model to:
+```python
+>>> trainer.train(tokens, labels, model_filepath="model.chaine")
+```
+### Hyperparameters
+Before training a model, you might want to find out the ideal hyperparameters first. You can just set the respective argument to `True`:
+```python
+>>> import chaine
+>>> model = chaine.train(tokens, labels, optimize_hyperparameters=True)
+```
+> This might be very memory and time consuming, because 5-fold cross validation for each of the 10 trials for each of the algorithms is performed.
+or use the `HyperparameterOptimizer` class and have more control over the optimization process:
+```python
+>>> from chaine import HyperparameterOptimizer
+>>> from chaine.optimization import L2SGDSearchSpace
+>>> optimizer = HyperparameterOptimizer(trials=50, folds=3, spaces=[L2SGDSearchSpace()])
+>>> optimizer.optimize_hyperparameters(tokens, labels, sample_size=1000)
+```
+This will make 50 trials with 3-fold cross validation for the Stochastic Gradient Descent algorithm and return a sorted list of hyperparameters with evaluation stats. The given data set is downsampled to 1000 instances.
+<details>
+<summary>Example of a hyperparameter optimization report</summary>
+```json
+[
+    {
+        "hyperparameters": {
+            "algorithm": "lbfgs",
+            "min_freq": 0,
+            "all_possible_states": true,
+            "all_possible_transitions": true,
+            "num_memories": 8,
+            "c1": 0.9,
+            "c2": 0.31,
+            "epsilon": 0.00011,
+            "period": 17,
+            "delta": 0.00051,
+            "linesearch": "Backtracking",
+            "max_linesearch": 31
+        },
+        "stats": {
+            "mean_precision": 0.4490952380952381,
+            "stdev_precision": 0.16497993418839532,
+            "mean_recall": 0.4554858934169279,
+            "stdev_recall": 0.20082402876210334,
+            "mean_f1": 0.45041435392087253,
+            "stdev_f1": 0.17914435056760908,
+            "mean_time": 0.3920876979827881,
+            "stdev_time": 0.0390961164333519
+        }
+    },
+    {
+        "hyperparameters": {
+            "algorithm": "lbfgs",
+            "min_freq": 5,
+            "all_possible_states": true,
+            "all_possible_transitions": false,
+            "num_memories": 9,
+            "c1": 1.74,
+            "c2": 0.09,
+            "epsilon": 0.0008600000000000001,
+            "period": 1,
+            "delta": 0.00045000000000000004,
+            "linesearch": "StrongBacktracking",
+            "max_linesearch": 34
+        },
+        "stats": {
+            "mean_precision": 0.4344436335328176,
+            "stdev_precision": 0.15542689556199216,
+            "mean_recall": 0.4385174258109041,
+            "stdev_recall": 0.19873733310765845,
+            "mean_f1": 0.43386496201052716,
+            "stdev_f1": 0.17225578421967264,
+            "mean_time": 0.12209572792053222,
+            "stdev_time": 0.0236177196325414
+        }
+    },
+    {
+        "hyperparameters": {
+            "algorithm": "lbfgs",
+            "min_freq": 2,
+            "all_possible_states": true,
+            "all_possible_transitions": true,
+            "num_memories": 1,
+            "c1": 0.91,
+            "c2": 0.4,
+            "epsilon": 0.0008400000000000001,
+            "period": 13,
+            "delta": 0.00018,
+            "linesearch": "MoreThuente",
+            "max_linesearch": 43
+        },
+        "stats": {
+            "mean_precision": 0.41963433149859447,
+            "stdev_precision": 0.16363544501259455,
+            "mean_recall": 0.4331173486012196,
+            "stdev_recall": 0.21344965207006913,
+            "mean_f1": 0.422038027332145,
+            "stdev_f1": 0.18245844823319127,
+            "mean_time": 0.2586916446685791,
+            "stdev_time": 0.04341208573100539
+        }
+    },
+    {
+        "hyperparameters": {
+            "algorithm": "l2sgd",
+            "min_freq": 5,
+            "all_possible_states": true,
+            "all_possible_transitions": true,
+            "c2": 1.68,
+            "period": 2,
+            "delta": 0.00047000000000000004,
+            "calibration_eta": 0.0006900000000000001,
+            "calibration_rate": 2.9000000000000004,
+            "calibration_samples": 1400,
+            "calibration_candidates": 25,
+            "calibration_max_trials": 23
+        },
+        "stats": {
+            "mean_precision": 0.2571428571428571,
+            "stdev_precision": 0.43330716823151716,
+            "mean_recall": 0.01,
+            "stdev_recall": 0.022360679774997897,
+            "mean_f1": 0.01702127659574468,
+            "stdev_f1": 0.038060731531911314,
+            "mean_time": 0.15442829132080077,
+            "stdev_time": 0.051750737506044905
+        }
+    }
+]
+```
+</details>
+### Inference
+The high-level function `chaine.train()` returns a `Model` object. You can load an already trained model from disk by initializing a `Model` object with the model's filepath:
+```python
+>>> from chaine import Model
+>>> model = Model("model.chaine")
+```
+You can predict labels for a batch of sequences:
+```python
+>>> tokens = [
+...     [{"index": 0, "text": "John"}, {"index": 1, "text": "Lennon"}],
+...     [{"index": 0, "text": "Paul"}, {"index": 1, "text": "McCartney"}],
+...     [{"index": 0, "text": "George"}, {"index": 1, "text": "Harrison"}],
+...     [{"index": 0, "text": "Ringo"}, {"index": 1, "text": "Starr"}]
+... ]
+>>> model.predict(tokens)
+[['B-PER', 'I-PER'], ['B-PER', 'I-PER'], ['B-PER', 'I-PER'], ['B-PER', 'I-PER']]
+```
+or only for a single sequence:
+```python
+>>> model.predict_single(tokens[0])
+['B-PER', 'I-PER']
+```
+If you are interested in the model's probability distribution for a given sequence, you can:
+```python
+>>> model.predict_proba_single(tokens[0])
+[[{'B-PER': 0.99, 'I-PER': 0.01}, {'B-PER': 0.01, 'I-PER': 0.99}]]
+```
+> Use the `model.predict_proba()` method for a batch of sequences.
+### Weights
+After loading a trained model, you can inspect the learned transition and state weights:
+```python
+>>> model = Model("model.chaine")
+>>> model.transitions
+[{'from': 'B-PER', 'to': 'I-PER', 'weight': 1.430506540616852e-06}]
+>>> model.states
+[{'feature': 'text:John', 'label': 'B-PER', 'weight': 9.536710877105517e-07}, ...]
+```
+You can also dump both transition and state weights as JSON:
+```python
+>>> model.dump_states("states.json")
+>>> model.dump_transitions("transitions.json")
+```
+## Credits
+This project makes use of and is partially based on:
+- [CRFsuite](https://github.com/chokkan/crfsuite)
+- [libLBFGS](https://github.com/chokkan/liblbfgs)
+- [python-crfsuite](https://github.com/scrapinghub/python-crfsuite)
+- [sklearn-crfsuite](https://github.com/TeamHG-Memex/sklearn-crfsuite)

chaine-3.13.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,68 @@
+chaine/logging.py,sha256=ecMug4UjT-tRwjNqFNlrAaElJFUyzncbPzEz1-EKDg8,5256
+chaine/crf.py,sha256=N6s6BhuQtzslHvbItP58kT-TygSkue5nmyGcgO4SEyA,18003
+chaine/__init__.py,sha256=TzYwbQ05GnL1zGl5Y981lpn1SJZNs-9g-H1rDSsQQQI,97
+chaine/typing.py,sha256=F5YEabzjTGcWdT1oVvc7kOoQgrF9YM28IWKt7cSv7TA,393
+chaine/training.py,sha256=l9gRS29jAAbPLvPbxrxMuJ89sPYg5B5cJJc7vl71bgU,8401
+chaine/validation.py,sha256=_kL7gELOpGAtSZk_MCAB0OP2iaMNNXOYVfnAn71u5dM,995
+chaine/_core/crf.cpython-311-darwin.so,sha256=dQJtjAWkM7vcUlqoASmO6H3M1VSuG0CQu8Em-A5Qk5Q,396344
+chaine/_core/crf.cpp,sha256=cJudgXE_R6A59tibSVNBVhV5yKAdpqKcn5C_a6HrG4I,817453
+chaine/_core/trainer_wrapper.hpp,sha256=k0qxNF1Zc1jUWSl8jGsk6lEV87HvrcX368Ld0X2QpCU,573
+chaine/_core/tagger_wrapper.hpp,sha256=Kp3-WuWptf_x1asr33PHFiuCFDKAjBE7Ux_Y-jms5-A,1312
+chaine/_core/crf.pyx,sha256=4fPdpCqtKxE7_yQ6d__FDmRnHpRMebYTVL8xfgH-fi4,9093
+chaine/_core/crfsuite_api.pxd,sha256=mXqxaXbqsu5EwnO10aCwNdaDHldunz_ctPjss7zJJKc,2003
+chaine/_core/trainer_wrapper.cpp,sha256=cWch9SUpmvU6ob70witv9JGZBo3Ffk8FyYZmOSYfoyw,745
+chaine/_core/liblbfgs/README,sha256=DFtDex47VeGMerhoN3r6NRpIyrA6X-dAlg3iod49wWM,2645
+chaine/_core/liblbfgs/COPYING,sha256=BttaPmruwflyFQDw2kp1zLW0I-6sjAbcXHKLoIP5gxE,1113
+chaine/_core/liblbfgs/include/lbfgs.h,sha256=Epq0BMDwOfEkCUaJDAJA9UoumXzh8pchDt_qg5D-caA,32846
+chaine/_core/liblbfgs/lib/arithmetic_sse_float.h,sha256=y4GHzZ_gKv9lQr7z-PWvNt1Ef8qM_Wb0wRoyInvrN6g,13356
+chaine/_core/liblbfgs/lib/arithmetic_ansi.h,sha256=z-kxgT3MLFPutBirvGzmtY27YRKwBuDgOx15n-7wxOc,3427
+chaine/_core/liblbfgs/lib/arithmetic_sse_double.h,sha256=kbRsNxVGimCU0GBu5ntBaqLpcJvHMv519ayirq4Oq6g,13805
+chaine/_core/liblbfgs/lib/lbfgs.c,sha256=NXJaMFR3msc-fvZGshul3urGNhCcMrmVtupF5fZFo34,43705
+chaine/_core/crfsuite/README,sha256=dfFr9_hpGb_-5bRx-OGb8A4HOv1TkvkbbiQC2zJ5LiM,7973
+chaine/_core/crfsuite/COPYING,sha256=yJTvUnFVB_ihql1gLeHlmkfAGB7f6_Cj8BYPa5A66SM,1535
+chaine/_core/crfsuite/swig/crfsuite.cpp,sha256=mlIpS4rUgAqoC92rmnY62H-eOOcvIeyGeelpZlfXfOM,24
+chaine/_core/crfsuite/include/crfsuite_api.hpp,sha256=bT7PenKkHZEO6C4YwH7K-531JoRBc6ZYUb8xna-WZ3w,14098
+chaine/_core/crfsuite/include/os.h,sha256=LwiRwgaake0zBqA-fAp9Ik7t3eblOOyY78spMUmsJ6Q,2238
+chaine/_core/crfsuite/include/crfsuite.h,sha256=vFWIxhig8SblpqI9LtB8UbijMPKcz6fZTpi6N2kCAPU,38631
+chaine/_core/crfsuite/include/crfsuite.hpp,sha256=yHKNeRiewwjM4ssBMqox_3_C3Mgn4Di6rF6zTihA2k0,18593
+chaine/_core/crfsuite/lib/crf/src/crf1d_context.c,sha256=KyKlCgG2XZQ-ve16J78C2qBPKs2KUuhBrGbBWue23f4,22618
+chaine/_core/crfsuite/lib/crf/src/dictionary.c,sha256=GlCNRabrGVZdh-SA6btiblt7ecDefy88QtHbdFBXW1M,3878
+chaine/_core/crfsuite/lib/crf/src/json.c,sha256=x6x-POaCJTHeVHoOGo_-3C_uUHDW2neHVVZnGyGcVMs,29211
+chaine/_core/crfsuite/lib/crf/src/train_arow.c,sha256=p5Qf33ugcmee-z-diCcqSI9bWu4u445RvLxrG9NVejs,10858
+chaine/_core/crfsuite/lib/crf/src/train_averaged_perceptron.c,sha256=sAzgkJuOj9lv_3hSpu_lwIBkGM3dM8XHg04l4z87Xgw,6704
+chaine/_core/crfsuite/lib/crf/src/dataset.c,sha256=Mtq3NmVjtGaCMGW3akiDOEOw8Z-JqYkZqykTRsDUbbA,3254
+chaine/_core/crfsuite/lib/crf/src/crf1d_feature.c,sha256=NzK63VSV9iWLLcIWxYpj2US4kqhBkDm_HpGVeSJef7M,10589
+chaine/_core/crfsuite/lib/crf/src/quark.h,sha256=xvI4sPC6plv4id91ZInIWf3lQXSdBtEIfJmxr_-fiOA,1983
+chaine/_core/crfsuite/lib/crf/src/params.h,sha256=Amz1OcAybVJNCe-A6sqQpOsGk8VebMZhGzJIpUOMv20,3754
+chaine/_core/crfsuite/lib/crf/src/crf1d_tag.c,sha256=KKj_8rS9C8FMKrmi0C5I2q9p8UYvKAG2IMPJl9r3tAU,16748
+chaine/_core/crfsuite/lib/crf/src/vecmath.h,sha256=XaBpZ2sDfjSYS9kS0xSaiTb43Hwi4MN7JANY12Z8imo,9913
+chaine/_core/crfsuite/lib/crf/src/crfsuite_internal.h,sha256=BOORaOdxhK5lQcOwwLXJ0BuwkS3a6XUNT-g0wGRGEsI,7547
+chaine/_core/crfsuite/lib/crf/src/train_l2sgd.c,sha256=RVuMLL-mp8llsZAiKS0yXvOdLkusux4wcGkxQCrl0aw,13947
+chaine/_core/crfsuite/lib/crf/src/holdout.c,sha256=60rLL5VCzH7ULgR_ZhtjmEkj8Zc_YGSayzCRrc5J8hE,2865
+chaine/_core/crfsuite/lib/crf/src/logging.h,sha256=ULplqPJhoMR1TGkKqUBzIBbQplS3YiqRcBYFRwOCCFY,2063
+chaine/_core/crfsuite/lib/crf/src/crfsuite_train.c,sha256=uqlp55WD4ke60j4V52f8nMtHEFzqTvx5Wougffad2kk,8549
+chaine/_core/crfsuite/lib/crf/src/rumavl.h,sha256=70L36qSMLW6VTrYgfGkTqI-aAD8jzMs6qPTd1YlG9Pc,5536
+chaine/_core/crfsuite/lib/crf/src/crf1d_encode.c,sha256=ouVuCPfrY6Q1UzJDRxcxRnIGaw6EEBxGm2i5CfvPpkI,29224
+chaine/_core/crfsuite/lib/crf/src/json.h,sha256=UqWIwZam8GSX58XI5zOzZqHoFI40V79b6CBw416aKFs,3389
+chaine/_core/crfsuite/lib/crf/src/crf1d.h,sha256=WNsBYaw34iNO6ZdRai1gp1WV_YDkU6qV5drLpFbj6kk,10712
+chaine/_core/crfsuite/lib/crf/src/train_passive_aggressive.c,sha256=8e17ZrO3AwiSVuDtKmag_0piGIzNwMRUImv5ogMJUqM,11565
+chaine/_core/crfsuite/lib/crf/src/quark.c,sha256=HIwp_nWyUkEvalvtxDTLTD2ivzyRTEJcs2SQTbr_7J4,4842
+chaine/_core/crfsuite/lib/crf/src/params.c,sha256=eCrpuNmYSKQBE0qZkUaO4e1ZAqBo7hVHP4uUH0c9dlM,10019
+chaine/_core/crfsuite/lib/crf/src/crfsuite.c,sha256=NbIIYrMAmQGoy0-vPmUZRiLJUN0yVRSRqbH2iCbYN6Y,14080
+chaine/_core/crfsuite/lib/crf/src/train_lbfgs.c,sha256=faXl6eX5iVhuXOAvpuwBKxMkRwN_bE-96SQwWJGRtCQ,9447
+chaine/_core/crfsuite/lib/crf/src/crf1d_model.c,sha256=TC-MNEnvphQYSsaRYATun5J9_Fmrn9zolfmn3xYpLHg,26694
+chaine/_core/crfsuite/lib/crf/src/rumavl.c,sha256=oJwD9UETztAWh24F336WQ1RYZu_1rg818MEfpHWk68E,33987
+chaine/_core/crfsuite/lib/crf/src/logging.c,sha256=m4s_RGdtRttzx6UhTVxOtK7Aq3c3XCG0SK4efF6i7J4,2640
+chaine/_core/crfsuite/lib/cqdb/COPYING,sha256=2AbX0dSRC8C9CY6VlwHWItkSHfJnmOdEIZ0HVvoOZ-E,1573
+chaine/_core/crfsuite/lib/cqdb/include/cqdb.h,sha256=qgEklRcZaYbqy52UQ8U3IIjlu8iyhR-PGyDzyyP48Bw,18728
+chaine/_core/crfsuite/lib/cqdb/src/lookup3.c,sha256=vItwg4xz2L-ETuNdjp-ciX2Y4s7BeQq7poltS7iWQkQ,36564
+chaine/_core/crfsuite/lib/cqdb/src/cqdb.c,sha256=3FRltjneWVXB1xcO5SLB0XBD2iH8EjWoWiKCtkt5N1w,17268
+chaine/_core/crfsuite/lib/cqdb/src/main.c,sha256=5XaF59oBpDrdzysph4yQlqD99oRp--ExwWTysKD4STc,4982
+chaine/optimization/metrics.py,sha256=m1csn5S5TrKYpvj6xGMlgIqoWqN9U-YTNwS5oXka4VQ,3716
+chaine/optimization/trial.py,sha256=Hcee5DfOp91oQt7VKHPw80XQpuIWfykmFCBXr1-UONY,3566
+chaine/optimization/__init__.py,sha256=VO4uQB6WjE3YSQyYkjIT_MvyifxoEGcwVMqj86Sbq_o,257
+chaine/optimization/spaces.py,sha256=V9B2YtO4borD6UgC55_vuchxrM0E5gVqiW_Ky5g1WaM,17797
+chaine/optimization/utils.py,sha256=2IdNfH0amKND6XQvwTCH3iMz3GPRVeCrrKkJ4cJklRo,3297
+chaine-3.13.1.dist-info/RECORD,,
+chaine-3.13.1.dist-info/WHEEL,sha256=MXJJ0F8jKJHeo3gqIKHGj-OCEmv0SmzrqL2y5h0WyCk,106
+chaine-3.13.1.dist-info/METADATA,sha256=ZJlOLAhb2T6kJLFhOAS1RD1Wizf0NB-249R9BVPyo18,12465

chaine-3.13.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.1
+Root-Is-Purelib: false
+Tag: cp311-cp311-macosx_11_0_arm64