PyPI - renard-pipeline - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

renard-pipeline 0.5.0py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of renard-pipeline might be problematic. Click here for more details.

Files changed (15) hide show

renard/ner_utils.py +24 -18
renard/pipeline/character_unification.py +48 -15
renard/pipeline/core.py +20 -0
renard/pipeline/corefs/corefs.py +2 -2
renard/pipeline/ner/__init__.py +1 -0
renard/pipeline/{ner.py → ner/ner.py} +45 -75
renard/pipeline/ner/retrieval.py +375 -0
renard/pipeline/tokenization.py +1 -0
renard/plot_utils.py +10 -3
renard/resources/determiners/__init__.py +1 -0
renard/resources/determiners/determiners.py +41 -0
{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/METADATA +21 -20
{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/RECORD +15 -11
{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/LICENSE +0 -0
{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/WHEEL +0 -0

renard/ner_utils.py CHANGED Viewed

@@ -74,7 +74,7 @@ class DataCollatorForTokenClassificationWithBatchEncoding:
 class NERDataset(Dataset):
     """
     :ivar _context_mask: for each element, a mask indicating which
-        tokens are part of the context (1 for context, 0 for text on
+        tokens are part of the context (0 for context, 1 for text on
         which to perform inference).  The mask allows to discard
         predictions made for context at inference time, even though
         the context can still be passed as input to the model.
@@ -92,11 +92,11 @@ class NERDataset(Dataset):
             assert all(
                 [len(cm) == len(elt) for elt, cm in zip(self.elements, context_mask)]
             )
-        self._context_mask = context_mask or [[0] * len(elt) for elt in self.elements]
+        self._context_mask = context_mask or [[1] * len(elt) for elt in self.elements]
         self.tokenizer = tokenizer
-    def __getitem__(self, index: Union[int, List[int]]) -> BatchEncoding:
+    def __getitem__(self, index: int) -> BatchEncoding:
         element = self.elements[index]
         batch = self.tokenizer(
@@ -104,19 +104,18 @@ class NERDataset(Dataset):
             truncation=True,
             max_length=512,  # TODO
             is_split_into_words=True,
+            return_length=True,
         )
-        batch["context_mask"] = [0] * len(batch["input_ids"])
-        elt_context_mask = self._context_mask[index]
-        for i in range(len(element)):
-            w2t = batch.word_to_tokens(0, i)
-            # w2t can be None in case of truncation, which can happen
-            # if `element' is too long
-            if w2t is None:
-                continue
-            mask_value = elt_context_mask[i]
-            tokens_mask = [mask_value] * (w2t.end - w2t.start)
-            batch["context_mask"][w2t.start : w2t.end] = tokens_mask
+        length = batch["length"][0]
+        del batch["length"]
+        if self.tokenizer.truncation_side == "right":
+            batch["context_mask"] = self._context_mask[index][:length]
+        else:
+            assert self.tokenizer.truncation_side == "left"
+            batch["context_mask"] = self._context_mask[index][
+                len(batch["input_ids"]) - length :
+            ]
         return batch
@@ -185,6 +184,7 @@ def load_conll2002_bio(
     path: str,
     tag_conversion_map: Optional[Dict[str, str]] = None,
     separator: str = "\t",
+    max_sent_len: Optional[int] = None,
     **kwargs,
 ) -> Tuple[List[List[str]], List[str], List[NEREntity]]:
     """Load a file under CoNLL2022 BIO format.  Sentences are expected
@@ -196,7 +196,9 @@ def load_conll2002_bio(
     :param separator: separator between token and BIO tags
     :param tag_conversion_map: conversion map for tags found in the
         input file.  Example : ``{'B': 'B-PER', 'I': 'I-PER'}``
-    :param kwargs: additional kwargs for ``open`` (such as
+    :param max_sent_len: if specified, maximum length, in tokens, of
+        sentences.
+    :param kwargs: additional kwargs for :func:`open` (such as
         ``encoding`` or ``newline``).
     :return: ``(sentences, tokens, entities)``
@@ -211,7 +213,9 @@ def load_conll2002_bio(
     tags = []
     for line in raw_data.split("\n"):
         line = line.strip("\n")
-        if re.fullmatch(r"\s*", line):
+        if re.fullmatch(r"\s*", line) or (
+            not max_sent_len is None and len(sent_tokens) >= max_sent_len
+        ):
             if len(sent_tokens) == 0:
                 continue
             sents.append(sent_tokens)
@@ -231,6 +235,7 @@ def hgdataset_from_conll2002(
     path: str,
     tag_conversion_map: Optional[Dict[str, str]] = None,
     separator: str = "\t",
+    max_sent_len: Optional[int] = None,
     **kwargs,
 ) -> HGDataset:
     """Load a CoNLL-2002 file as a Huggingface Dataset.
@@ -238,12 +243,13 @@ def hgdataset_from_conll2002(
     :param path: passed to :func:`.load_conll2002_bio`
     :param tag_conversion_map: passed to :func:`load_conll2002_bio`
     :param separator: passed to :func:`load_conll2002_bio`
-    :param kwargs: passed to :func:`load_conll2002_bio`
+    :param max_sent_len: passed to :func:`load_conll2002_bio`
+    :param kwargs: additional kwargs for :func:`open`
     :return: a :class:`datasets.Dataset` with features 'tokens' and 'labels'.
     """
     sentences, tokens, entities = load_conll2002_bio(
-        path, tag_conversion_map, separator, **kwargs
+        path, tag_conversion_map, separator, max_sent_len, **kwargs
     )
     # convert entities to labels

renard/pipeline/character_unification.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from typing import Any, Dict, List, FrozenSet, Set, Optional, Tuple, Union, Literal
-import copy
+import re, sys
 from itertools import combinations
 from collections import defaultdict, Counter
 from dataclasses import dataclass
@@ -11,6 +11,7 @@ from renard.pipeline.ner import NEREntity
 from renard.pipeline.progress import ProgressReporter
 from renard.resources.hypocorisms import HypocorismGazetteer
 from renard.resources.pronouns import is_a_female_pronoun, is_a_male_pronoun
+from renard.resources.determiners import singular_determiners
 from renard.resources.titles import is_a_male_title, is_a_female_title, all_titles
@@ -167,6 +168,7 @@ class GraphRulesCharacterUnifier(PipelineStep):
         additional_hypocorisms: Optional[List[Tuple[str, List[str]]]] = None,
         link_corefs_mentions: bool = False,
         ignore_lone_titles: Optional[Set[str]] = None,
+        ignore_leading_determiner: bool = False,
     ) -> None:
         """
         :param min_appearances: minimum number of appearances of a
@@ -181,15 +183,20 @@ class GraphRulesCharacterUnifier(PipelineStep):
             extract a lot of spurious links.  However, linking by
             coref is sometimes the only way to resolve a character
             alias.
-        :param ignore_lone_titles: a set of titles to ignore when
-            they stand on their own.  This avoids extracting false
+        :param ignore_lone_titles: a set of titles to ignore when they
+            stand on their own.  This avoids extracting false
             positives characters such as 'Mr.' or 'Miss'.
+        :param ignore_leading_determiner: if ``True``, will ignore the
+            leading determiner when applying unification rules.  This
+            is useful if the NER model used in the pipeline adds
+            leading determiners as part of entites.
         """
         self.min_appearances = min_appearances
         self.additional_hypocorisms = additional_hypocorisms
         self.link_corefs_mentions = link_corefs_mentions
         self.ignore_lone_titles = ignore_lone_titles or set()
         self.character_ner_tag = "PER"  # a default value, will be set by _pipeline_init
+        self.ignore_leading_determiner = ignore_leading_determiner
         super().__init__()
@@ -229,23 +236,28 @@ class GraphRulesCharacterUnifier(PipelineStep):
         # * link nodes based on several rules
         for name1, name2 in combinations(G.nodes(), 2):
+            # preprocess name when needed
+            pname1 = self._preprocess_name(name1)
+            pname2 = self._preprocess_name(name2)
             # is one name a known hypocorism of the other ? (also
             # checks if both names are the same)
-            if self.hypocorism_gazetteer.are_related(name1, name2):
+            if self.hypocorism_gazetteer.are_related(pname1, pname2):
                 G.add_edge(name1, name2)
                 continue
             # if we remove the title, is one name related to the other
             # ?
             if self.names_are_related_after_title_removal(
-                name1, name2, hname_constants
+                pname1, pname2, hname_constants
             ):
                 G.add_edge(name1, name2)
                 continue
             # add an edge if two characters have the same family names
-            human_name1 = HumanName(name1, constants=hname_constants)
-            human_name2 = HumanName(name2, constants=hname_constants)
+            human_name1 = HumanName(pname1, constants=hname_constants)
+            human_name2 = HumanName(pname2, constants=hname_constants)
             if (
                 len(human_name1.last) > 0
                 and human_name1.last.lower() == human_name2.last.lower()
@@ -282,10 +294,15 @@ class GraphRulesCharacterUnifier(PipelineStep):
                 pass
         for name1, name2 in combinations(G.nodes(), 2):
+            # preprocess names when needed
+            pname1 = self._preprocess_name(name1)
+            pname2 = self._preprocess_name(name2)
             # check if characters have the same last name but a
             # different first name.
-            human_name1 = HumanName(name1, constants=hname_constants)
-            human_name2 = HumanName(name2, constants=hname_constants)
+            human_name1 = HumanName(pname1, constants=hname_constants)
+            human_name2 = HumanName(pname2, constants=hname_constants)
             if (
                 len(human_name1.last) > 0
                 and len(human_name2.last) > 0
@@ -337,6 +354,17 @@ class GraphRulesCharacterUnifier(PipelineStep):
         return {"characters": characters}
+    def _preprocess_name(self, name) -> str:
+        if self.ignore_leading_determiner:
+            if not self.lang in singular_determiners:
+                print(
+                    f"[warning] can't ignore leading determiners for {self.lang}",
+                    file=sys.stderr,
+                )
+            for determiner in singular_determiners.get(self.lang, []):
+                name = re.sub(f"^{determiner} ", " ", name, flags=re.I)
+        return name
     def _make_hname_constants(self) -> Constants:
         if self.lang == "eng":
             return Constants()
@@ -365,13 +393,18 @@ class GraphRulesCharacterUnifier(PipelineStep):
             or self.hypocorism_gazetteer.are_related(raw_name1, raw_name2)
         )
-    def names_are_in_coref(self, name1: str, name2: str, corefs: List[List[Mention]]):
+    def names_are_in_coref(
+        self, name1: str, name2: str, corefs: List[List[Mention]]
+    ) -> bool:
+        once_together = False
         for coref_chain in corefs:
-            if any([name1 == " ".join(m.tokens) for m in coref_chain]) and any(
-                [name2 == " ".join(m.tokens) for m in coref_chain]
-            ):
-                return True
-        return False
+            name1_in = any([name1 == " ".join(m.tokens) for m in coref_chain])
+            name2_in = any([name2 == " ".join(m.tokens) for m in coref_chain])
+            if name1_in == (not name2_in):
+                return False
+            elif name1_in and name2_in:
+                once_together = True
+        return once_together
     def infer_name_gender(
         self,

renard/pipeline/core.py CHANGED Viewed

@@ -289,6 +289,7 @@ class PipelineState:
         node_kwargs: Optional[List[Dict[str, Any]]] = None,
         edge_kwargs: Optional[List[Dict[str, Any]]] = None,
         label_kwargs: Optional[List[Dict[str, Any]]] = None,
+        legend: bool = False,
     ):
         """Plot ``self.character_graph`` using reasonable default
         parameters, and save the produced figures in the specified
@@ -306,6 +307,7 @@ class PipelineState:
         :param node_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param edge_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param label_kwargs: passed to :func:`nx.draw_networkx_labels`
+        :param legend: passed to :func:`.plot_nx_graph_reasonably`
         """
         import matplotlib.pyplot as plt
@@ -346,6 +348,7 @@ class PipelineState:
                 node_kwargs=node_kwargs[i],
                 edge_kwargs=edge_kwargs[i],
                 label_kwargs=label_kwargs[i],
+                legend=legend,
             )
             plt.savefig(f"{directory}/{i}.png")
             plt.close()
@@ -361,6 +364,8 @@ class PipelineState:
         node_kwargs: Optional[Dict[str, Any]] = None,
         edge_kwargs: Optional[Dict[str, Any]] = None,
         label_kwargs: Optional[Dict[str, Any]] = None,
+        tight_layout: bool = False,
+        legend: bool = False,
     ):
         """Plot ``self.character_graph`` using reasonable parameters,
         and save the produced figure to a file
@@ -373,6 +378,8 @@ class PipelineState:
         :param node_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param edge_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param label_kwargs: passed to :func:`nx.draw_networkx_labels`
+        :param tight_layout: if ``True``, will use matplotlib's tight_layout
+        :param legend: passed to :func:`.plot_nx_graph_reasonably`
         """
         import matplotlib.pyplot as plt
@@ -397,7 +404,10 @@ class PipelineState:
             node_kwargs=node_kwargs,
             edge_kwargs=edge_kwargs,
             label_kwargs=label_kwargs,
+            legend=legend,
         )
+        if tight_layout:
+            fig.tight_layout()
         plt.savefig(path)
         plt.close()
@@ -414,6 +424,8 @@ class PipelineState:
         node_kwargs: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
         edge_kwargs: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
         label_kwargs: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
+        tight_layout: bool = False,
+        legend: bool = False,
     ):
         """Plot ``self.character_network`` using reasonable default
         parameters
@@ -442,6 +454,8 @@ class PipelineState:
         :param node_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param edge_kwargs: passed to :func:`nx.draw_networkx_nodes`
         :param label_kwargs: passed to :func:`nx.draw_networkx_labels`
+        :param tight_layout: if ``True``, will use matplotlib's tight_layout
+        :param legend: passed to :func:`.plot_nx_graph_reasonably`
         """
         import matplotlib.pyplot as plt
         from matplotlib.widgets import Slider
@@ -463,6 +477,8 @@ class PipelineState:
             assert not isinstance(node_kwargs, list)
             assert not isinstance(edge_kwargs, list)
             assert not isinstance(label_kwargs, list)
+            if tight_layout:
+                fig.tight_layout()
             plot_nx_graph_reasonably(
                 G,
                 ax=ax,
@@ -470,6 +486,7 @@ class PipelineState:
                 node_kwargs=node_kwargs,
                 edge_kwargs=edge_kwargs,
                 label_kwargs=label_kwargs,
+                legend=legend,
             )
             return
@@ -520,11 +537,14 @@ class PipelineState:
                 node_kwargs=node_kwargs[slider_i],
                 edge_kwargs=edge_kwargs[slider_i],
                 label_kwargs=label_kwargs[slider_i],
+                legend=legend,
             )
             ax.set_xlim(-1.2, 1.2)
             ax.set_ylim(-1.2, 1.2)
         slider_ax = fig.add_axes([0.1, 0.05, 0.8, 0.04])
+        if tight_layout:
+            fig.tight_layout()
         # HACK: we save the slider to the figure. This ensure the
         # slider is still alive at plotting time.
         fig.slider = Slider(  # type: ignore

renard/pipeline/corefs/corefs.py CHANGED Viewed

@@ -20,7 +20,7 @@ class BertCoreferenceResolver(PipelineStep):
     def __init__(
         self,
         model: Optional[Union[BertForCoreferenceResolution]] = None,
-        hugginface_model_id: Optional[str] = None,
+        huggingface_model_id: Optional[str] = None,
         batch_size: int = 1,
         device: Literal["auto", "cuda", "cpu"] = "auto",
         tokenizer: Optional[PreTrainedTokenizerFast] = None,
@@ -47,7 +47,7 @@ class BertCoreferenceResolver(PipelineStep):
             inference on the whole document.
         """
         if isinstance(model, str):
-            self.hugginface_model_id = hugginface_model_id
+            self.hugginface_model_id = huggingface_model_id
             self.model = None  # model will be init by _pipeline_init_
         else:
             self.hugginface_model_id = None

renard/pipeline/ner/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from renard.pipeline.ner.ner import *

renard/pipeline/{ner.py → ner/ner.py} RENAMED Viewed

@@ -1,22 +1,32 @@
 from __future__ import annotations
-import random, itertools
-from typing import TYPE_CHECKING, List, Dict, Any, Set, Tuple, Optional, Union, Literal
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Dict,
+    Any,
+    Set,
+    Tuple,
+    Optional,
+    Union,
+    Literal,
+)
 from dataclasses import dataclass
 import torch
-from seqeval.metrics import precision_score, recall_score, f1_score
 from renard.nltk_utils import nltk_fix_bio_tags
 from renard.ner_utils import (
     DataCollatorForTokenClassificationWithBatchEncoding,
     NERDataset,
 )
 from renard.pipeline.core import PipelineStep, Mention
-from renard.pipeline.progress import ProgressReporter
 from renard.ner_utils import ner_entities
 if TYPE_CHECKING:
     from transformers.tokenization_utils_base import BatchEncoding
-    from transformers import PreTrainedModel, PreTrainedTokenizerFast
-    from renard.pipeline.core import Pipeline
+    from transformers import (
+        PreTrainedModel,
+        PreTrainedTokenizerFast,
+    )
+    from renard.pipeline.ner.retrieval import NERContextRetriever
 @dataclass
@@ -28,7 +38,7 @@ class NEREntity(Mention):
         """
         .. note::
-            This method is implemtented here to avoid type issues.  Since
+            This method is implemented here to avoid type issues.  Since
             :meth:`.Mention.shifted` cannot be annotated as returning
             ``Self``, this method annotate the correct return type when
             using :meth:`.NEREntity.shifted`.
@@ -42,18 +52,21 @@ class NEREntity(Mention):
 def score_ner(
     pred_bio_tags: List[str], ref_bio_tags: List[str]
 ) -> Tuple[float, float, float]:
-    """Score NER as in CoNLL-2003 shared task using ``seqeval``
+    """Score NER as in CoNLL-2003 shared task using the ``seqeval``
+    library, if installed.
     Precision is the percentage of named entities in ``ref_bio_tags``
-    that are correct. Recall is the percentage of named entities in
-    pred_bio_tags that are in ref_bio_tags. F1 is the harmonic mean of
-    both.
+    that are correct.  Recall is the percentage of named entities in
+    pred_bio_tags that are in ref_bio_tags.  F1 is the harmonic mean
+    of both.
     :param pred_bio_tags:
     :param ref_bio_tags:
-    :return: ``(precision, recall, F1 score)``
+    :return: ``(precision, recall, F1 score)``
     """
+    from seqeval.metrics import precision_score, recall_score, f1_score
     assert len(pred_bio_tags) == len(ref_bio_tags)
     return (
         precision_score([ref_bio_tags], [pred_bio_tags]),
@@ -71,12 +84,19 @@ class NLTKNamedEntityRecognizer(PipelineStep):
         """
         import nltk
-        nltk.download("averaged_perceptron_tagger", quiet=True)
+        nltk.download(f"averaged_perceptron_tagger", quiet=True)
         nltk.download("maxent_ne_chunker", quiet=True)
+        nltk.download("maxent_ne_chunker_tab", quiet=True)
         nltk.download("words", quiet=True)
         super().__init__()
+    def _pipeline_init_(self, lang: str, **kwargs):
+        import nltk
+        nltk.download(f"averaged_perceptron_tagger_{lang}", quiet=True)
+        super()._pipeline_init_(lang, **kwargs)
     def __call__(self, tokens: List[str], **kwargs) -> Dict[str, Any]:
         """
         :param text:
@@ -102,64 +122,6 @@ class NLTKNamedEntityRecognizer(PipelineStep):
         return {"entities"}
-class NERContextRetriever:
-    def __call__(self, dataset: NERDataset) -> NERDataset:
-        raise NotImplementedError
-class NERSamenounContextRetriever(NERContextRetriever):
-    """
-    Retrieve relevant context using the samenoun strategy as in
-    Amalvy et al.  2023.
-    """
-    def __init__(self, k: int) -> None:
-        """
-        :param k: the number of sentences to retrieve
-        """
-        self.k = k
-    def __call__(self, dataset: NERDataset) -> NERDataset:
-        import nltk
-        # NOTE: POS tagging is not incorporated in the pipeline yet,
-        # so we manually compute it here.
-        elements_names = [
-            {t[0] for t in nltk.pos_tag(element) if t[1].startswith("NN")}
-            for element in dataset.elements
-        ]
-        elements_with_context = []
-        for elt_i, elt in enumerate(dataset.elements):
-            retrieved_elts = [
-                other_elt
-                for other_elt_i, other_elt in enumerate(dataset.elements)
-                if not other_elt_i == elt_i
-                and len(elements_names[elt_i].intersection(elements_names[other_elt_i]))
-                > 0
-            ]
-            retrieved_elts = random.sample(
-                retrieved_elts, k=min(self.k, len(retrieved_elts))
-            )
-            elements_with_context.append(
-                (
-                    elt,
-                    [dataset.tokenizer.sep_token]
-                    + list(itertools.chain.from_iterable(retrieved_elts)),
-                )
-            )
-        return NERDataset(
-            [element + context for element, context in elements_with_context],
-            dataset.tokenizer,
-            [
-                [0] * len(element) + [1] * len(context)
-                for element, context in elements_with_context
-            ],
-        )
 class BertNamedEntityRecognizer(PipelineStep):
     """An entity recognizer based on BERT"""
@@ -307,7 +269,7 @@ class BertNamedEntityRecognizer(PipelineStep):
         batch_i: int,
         wp_labels: List[str],
         tokens: List[str],
-        context_mask: torch.Tensor,
+        ctxmask: torch.Tensor,
     ) -> List[str]:
         """Align labels to tokens rather than wordpiece tokens.
@@ -318,13 +280,21 @@ class BertNamedEntityRecognizer(PipelineStep):
         """
         batch_labels = ["O"] * len(tokens)
+        try:
+            inference_start = ctxmask[batch_i].tolist().index(1)
+        except ValueError:
+            inference_start = 0
         for wplabel_j, wp_label in enumerate(wp_labels):
-            if context_mask[batch_i][wplabel_j] == 1:
-                continue
             token_i = batchs.token_to_word(batch_i, wplabel_j)
             if token_i is None:
                 continue
-            batch_labels[token_i] = wp_label
+            if ctxmask[batch_i][token_i] == 0:
+                continue
+            batch_labels[token_i - inference_start] = wp_label
         return batch_labels

renard/pipeline/ner/retrieval.py ADDED Viewed

@@ -0,0 +1,375 @@
+from collections.abc import Set
+import sys
+from typing import Union, List, cast, Literal, Optional
+import random
+from dataclasses import dataclass
+from more_itertools import flatten
+from renard.ner_utils import NERDataset
+import nltk
+from rank_bm25 import BM25Okapi
+from transformers import (
+    BertForSequenceClassification,
+    BertTokenizerFast,
+    DataCollatorWithPadding,
+)
+from transformers.tokenization_utils_base import BatchEncoding
+import torch
+from torch.utils.data import Dataset, DataLoader
+@dataclass
+class NERContextRetrievalMatch:
+    element: List[str]
+    element_i: int
+    side: Literal["left", "right"]
+    score: Optional[float]
+    def __hash__(self) -> int:
+        return hash(tuple(self.element) + (self.element_i, self.side, self.score))
+class NERContextRetriever:
+    def __init__(self, k: int) -> None:
+        self.k = k
+    def compute_global_features(self, elements: List[List[str]]) -> dict:
+        return {}
+    def retrieve(
+        self, element_i: int, elements: List[List[str]], **kwargs
+    ) -> List[NERContextRetrievalMatch]:
+        raise NotImplementedError
+    def __call__(self, dataset: NERDataset) -> NERDataset:
+        # [(left_ctx, element, right_ctx), ...]
+        elements_with_context = []
+        global_features = self.compute_global_features(dataset.elements)
+        for elt_i, elt in enumerate(dataset.elements):
+            matchs = self.retrieve(elt_i, dataset.elements, **global_features)
+            assert len(matchs) <= self.k
+            lctx = sorted(
+                (m for m in matchs if m.side == "left"),
+                key=lambda m: m.element_i,
+            )
+            lctx = list(flatten([m.element for m in lctx]))
+            rctx = sorted(
+                (m for m in matchs if m.side == "right"),
+                key=lambda m: m.element_i,
+            )
+            rctx = list(flatten([m.element for m in rctx]))
+            elements_with_context.append((lctx, elt, rctx))
+        return NERDataset(
+            [lctx + element + rctx for lctx, element, rctx in elements_with_context],
+            dataset.tokenizer,
+            [
+                [1] * len(lctx) + [0] * len(element) + [1] * len(rctx)
+                for lctx, element, rctx in elements_with_context
+            ],
+        )
+class NERSamenounContextRetriever(NERContextRetriever):
+    """
+    Retrieve relevant context using the samenoun strategy as in
+    Amalvy et al.  2023.
+    """
+    def __init__(self, k: int) -> None:
+        """
+        :param k: the max number of sentences to retrieve
+        """
+        super().__init__(k)
+    def compute_global_features(self, elements: List[List[str]]) -> dict:
+        return {
+            "NNs": [
+                {t[0] for t in nltk.pos_tag(element) if t[1] == "NN"}
+                for element in elements
+            ]
+        }
+    def retrieve(
+        self, element_i: int, elements: List[List[str]], NNs: List[Set[str]], **kwargs
+    ) -> List[NERContextRetrievalMatch]:
+        matchs = [
+            NERContextRetrievalMatch(
+                other_elt,
+                other_elt_i,
+                "left" if other_elt_i < element_i else "right",
+                None,
+            )
+            for other_elt_i, other_elt in enumerate(elements)
+            if not other_elt_i == element_i
+            and len(NNs[element_i].intersection(NNs[other_elt_i])) > 0  # type: ignore
+        ]
+        return random.sample(matchs, k=min(self.k, len(matchs)))
+class NERNeighborsContextRetriever(NERContextRetriever):
+    """A context retriever that chooses nearby elements."""
+    def __init__(self, k: int):
+        assert k % 2 == 0
+        super().__init__(k)
+    def retrieve(
+        self, element_i: int, elements: List[List[str]], **kwargs
+    ) -> List[NERContextRetrievalMatch]:
+        left_nb = self.k // 2
+        right_nb = left_nb
+        lctx = []
+        for i, elt in enumerate(elements[element_i - left_nb : element_i]):
+            lctx.append(
+                NERContextRetrievalMatch(elt, element_i - left_nb + i, "left", None)
+            )
+        rctx = []
+        for i, elt in enumerate(elements[element_i + 1 : element_i + 1 + right_nb]):
+            rctx.append(NERContextRetrievalMatch(elt, element_i + 1 + i, "right", None))
+        return lctx + rctx
+class NERBM25ContextRetriever(NERContextRetriever):
+    """A context retriever that selects elements according to the BM25 ranking formula."""
+    def __init__(self, k: int) -> None:
+        super().__init__(k)
+    def compute_global_features(self, elements: List[List[str]]) -> dict:
+        return {"bm25_model": BM25Okapi(elements)}
+    def retrieve(
+        self, element_i: int, elements: List[List[str]], bm25_model: BM25Okapi, **kwargs
+    ) -> List[NERContextRetrievalMatch]:
+        query = elements[element_i]
+        sent_scores = bm25_model.get_scores(query)
+        sent_scores[element_i] = float("-Inf")  # don't retrieve self
+        topk_values, topk_indexs = torch.topk(
+            torch.tensor(sent_scores), k=min(self.k, len(sent_scores)), dim=0
+        )
+        return [
+            NERContextRetrievalMatch(
+                elements[index], index, "left" if index < element_i else "right", value
+            )
+            for value, index in zip(topk_values.tolist(), topk_indexs.tolist())
+        ]
+@dataclass(frozen=True)
+class NERNeuralContextRetrievalExample:
+    """A context retrieval example."""
+    #: text on which NER is performed
+    element: List[str]
+    #: context to assist during prediction
+    context: List[str]
+    #: context side (does the context comes from the left or the right of ``sent`` ?)
+    context_side: Literal["left", "right"]
+class NERNeuralContextRetrievalDataset(Dataset):
+    """"""
+    def __init__(
+        self,
+        examples: List[NERNeuralContextRetrievalExample],
+        tokenizer: BertTokenizerFast,
+    ) -> None:
+        self.examples = examples
+        self.tokenizer: BertTokenizerFast = tokenizer
+    def __len__(self) -> int:
+        return len(self.examples)
+    def __getitem__(self, index: int) -> BatchEncoding:
+        """Get a BatchEncoding representing example at index.
+        :param index: index of the example to retrieve
+        :return: a ``BatchEncoding``, with key ``'label'`` set.
+        """
+        example = self.examples[index]
+        tokens = example.context + ["[SEP]"] + example.element
+        batch: BatchEncoding = self.tokenizer(
+            tokens,
+            is_split_into_words=True,
+            truncation=True,
+            max_length=512,
+        )
+        return batch
+class NERNeuralContextRetriever(NERContextRetriever):
+    """
+    A neural context retriever as in Amalvy et al.  2024
+    """
+    def __init__(
+        self,
+        heuristic_context_selector: NERContextRetriever,
+        pretrained_model: Union[
+            str, BertForSequenceClassification
+        ] = "compnet-renard/bert-base-cased-NER-reranker",
+        k: int = 3,
+        batch_size: int = 1,
+        threshold: float = 0.0,
+        device_str: Literal["cuda", "cpu", "auto"] = "auto",
+    ) -> None:
+        """
+        :param pretrained_model: pretrained model name, used to
+            load a :class:`transformers.BertForSequenceClassification`
+        :param heuristic_context_selector: name of the context
+            selector to use as retrieval heuristic, from
+            ``context_selector_name_to_class``
+        :param heuristic_context_selector_kwargs: kwargs to pass the
+            heuristic context retriever at instantiation time
+        :param k: max number of sents to retrieve
+        :param batch_size: batch size used at inference
+        :param threshold:
+        :param device_str:
+        """
+        from transformers import BertForSequenceClassification, BertTokenizerFast
+        if isinstance(pretrained_model, str):
+            self.ctx_classifier = BertForSequenceClassification.from_pretrained(
+                pretrained_model
+            )  # type: ignore
+        else:
+            self.ctx_classifier = pretrained_model
+        self.ctx_classifier = cast(BertForSequenceClassification, self.ctx_classifier)
+        self.tokenizer = BertTokenizerFast.from_pretrained(
+            pretrained_model if isinstance(pretrained_model, str) else "bert-base-cased"
+        )
+        self.heuristic_context_selector = heuristic_context_selector
+        self.batch_size = batch_size
+        self.threshold = threshold
+        if device_str == "auto":
+            device_str = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = torch.device(device_str)
+        super().__init__(k)
+    def set_heuristic_k_(self, k: int):
+        self.heuristic_context_selector.k = k
+    def predict(self, examples: List[NERNeuralContextRetrievalExample]) -> torch.Tensor:
+        """
+        :param dataset: A list of :class:`ContextSelectionExample`
+        :return: A tensor of shape ``(len(dataset), 2)`` of class
+                 scores
+        """
+        dataset = NERNeuralContextRetrievalDataset(examples, self.tokenizer)
+        self.ctx_classifier = self.ctx_classifier.to(self.device)
+        data_collator = DataCollatorWithPadding(dataset.tokenizer)  # type: ignore
+        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False, collate_fn=data_collator)  # type: ignore
+        # inference using self.ctx_classifier
+        self.ctx_classifier = self.ctx_classifier.eval()
+        with torch.no_grad():
+            scores = torch.zeros((0,)).to(self.device)
+            for X in dataloader:
+                X = X.to(self.device)
+                # out.logits is of shape (batch_size, 2)
+                out = self.ctx_classifier(
+                    X["input_ids"],
+                    token_type_ids=X["token_type_ids"],
+                    attention_mask=X["attention_mask"],
+                )
+                # (batch_size, 2)
+                pred = torch.softmax(out.logits, dim=1)
+                scores = torch.cat([scores, pred], dim=0)
+        return scores
+    def compute_global_features(self, elements: List[List[str]]) -> dict:
+        features = self.heuristic_context_selector.compute_global_features(elements)
+        return {
+            "heuristic_matchs": [
+                self.heuristic_context_selector.retrieve(i, elements, **features)
+                for i in range(len(elements))
+            ]
+        }
+    def retrieve(
+        self,
+        element_i: int,
+        elements: List[List[str]],
+        heuristic_matchs: List[List[NERContextRetrievalMatch]],
+        **kwargs,
+    ) -> List[NERContextRetrievalMatch]:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.ctx_classifier = self.ctx_classifier.to(device)  # type: ignore
+        # no context retrieved by heuristic : nothing to do
+        if len(heuristic_matchs) == 0:
+            return []
+        element = elements[element_i]
+        matchs = heuristic_matchs[element_i]
+        # prepare datas for inference
+        ctx_dataset = [
+            NERNeuralContextRetrievalExample(element, m.element, m.side) for m in matchs
+        ]
+        # (len(dataset), 2)
+        scores = self.predict(ctx_dataset)
+        for i, m in enumerate(matchs):
+            m.score = float(scores[i, 1].item())
+        assert all([not m.score is None for m in matchs])
+        return [
+            m
+            for m in sorted(matchs, key=lambda m: -m.score)[: self.k]  # type: ignore
+            if m.score > self.threshold  # type: ignore
+        ]
+class NEREnsembleContextRetriever(NERContextRetriever):
+    """Combine several context retriever"""
+    def __init__(self, retrievers: List[NERContextRetriever], k: int) -> None:
+        self.retrievers = retrievers
+        super().__init__(k)
+    def compute_global_features(self, elements: List[List[str]]) -> dict:
+        features = {}
+        for retriever in self.retrievers:
+            for k, v in retriever.compute_global_features(elements).items():
+                if k in features:
+                    print(
+                        f"[warning] NEREnsembleContextRetriver: incompatible global feature '{k}' between multiple retrievers.",
+                        file=sys.stderr,
+                    )
+                features[k] = v
+        return features
+    def retrieve(
+        self, element_i: int, elements: List[List[str]], **kwargs
+    ) -> List[NERContextRetrievalMatch]:
+        all_matchs = set()
+        for retriever in self.retrievers:
+            matchs = retriever.retrieve(element_i, elements, **kwargs)
+            all_matchs = all_matchs.union(matchs)
+        if all(not m.score is None for m in all_matchs):
+            return sorted(all_matchs, key=lambda m: -m.score)[: self.k]  # type: ignore
+        return random.choices(list(all_matchs), k=self.k)

renard/pipeline/tokenization.py CHANGED Viewed

@@ -37,6 +37,7 @@ class NLTKTokenizer(PipelineStep):
     def __init__(self):
         nltk.download("punkt", quiet=True)
+        nltk.download("punkt_tab", quiet=True)
         self.word_tokenizer = None
         self.sent_tokenizer = None
         super().__init__()

renard/plot_utils.py CHANGED Viewed

@@ -25,6 +25,7 @@ def plot_nx_graph_reasonably(
     node_kwargs: Optional[Dict[str, Any]] = None,
     edge_kwargs: Optional[Dict[str, Any]] = None,
     label_kwargs: Optional[Dict[str, Any]] = None,
+    legend: bool = False,
 ):
     """Try to plot a :class:`nx.Graph` with 'reasonable' parameters
@@ -35,6 +36,7 @@ def plot_nx_graph_reasonably(
     :param node_kwargs: passed to :func:`nx.draw_networkx_nodes`
     :param edge_kwargs: passed to :func:`nx.draw_networkx_nodes`
     :param label_kwargs: passed to :func:`nx.draw_networkx_labels`
+    :param legend: if ``True``, will try to plot an additional legend.
     """
     pos = layout
     if pos is None:
@@ -48,7 +50,12 @@ def plot_nx_graph_reasonably(
     node_kwargs["node_size"] = node_kwargs.get(
         "node_size", [1 + degree * 10 for _, degree in G.degree]
     )
-    nx.draw_networkx_nodes(G, pos, ax=ax, **node_kwargs)
+    scatter = nx.draw_networkx_nodes(G, pos, ax=ax, **node_kwargs)
+    if legend:
+        if ax:
+            ax.legend(*scatter.legend_elements("sizes"))
+        else:
+            plt.legend(*scatter.legend_elements("sizes"))
     edge_kwargs = edge_kwargs or {}
     edges_attrs = graph_edges_attributes(G)
@@ -64,11 +71,11 @@ def plot_nx_graph_reasonably(
         edge_kwargs["edge_cmap"] = None
     else:
         edge_kwargs["edge_color"] = edge_kwargs.get(
-            "edge_color", [math.log(d["weight"]) for *_, d in G.edges.data()]
+            "edge_color", [math.log(d.get("weight", 1)) for *_, d in G.edges.data()]
         )
         edge_kwargs["edge_cmap"] = edge_kwargs.get("edge_cmap", plt.get_cmap("viridis"))
     edge_kwargs["width"] = edge_kwargs.get(
-        "width", [1 + math.log(d["weight"]) for _, _, d in G.edges.data()]
+        "width", [1 + math.log(d.get("weight", 1)) for _, _, d in G.edges.data()]
     )
     edge_kwargs["alpha"] = edge_kwargs.get("alpha", 0.35)
     nx.draw_networkx_edges(G, pos, ax=ax, **edge_kwargs)

renard/resources/determiners/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from renard.resources.determiners.determiners import *

renard/resources/determiners/determiners.py ADDED Viewed

@@ -0,0 +1,41 @@
+singular_determiners = {
+    "eng": {
+        "a",
+        "some",
+        "the",
+        "his",
+        "her",
+        "my",
+        "their",
+        "this",
+        "that",
+        "its",
+        "our",
+        "your",
+        "such",
+    },
+    "fra": {
+        "le",
+        "la",
+        "les",
+        "un",
+        "une",
+        "du",
+        "de",
+        "de la",
+        "ce",
+        "cette",
+        "mon",
+        "ma",
+        "ton",
+        "ta",
+        "son",
+        "sa",
+        "notre",
+        "votre",
+        "leur",
+        "au",
+        "à",
+        "l '",
+    },
+}

{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/METADATA RENAMED Viewed

@@ -1,37 +1,38 @@
 Metadata-Version: 2.1
 Name: renard-pipeline
-Version: 0.5.0
+Version: 0.6.1
 Summary: Relationships Extraction from NARrative Documents
 Home-page: https://github.com/CompNet/Renard
 License: GPL-3.0-only
 Author: Arthur Amalvy
 Author-email: arthur.amalvy@univ-avignon.fr
-Requires-Python: >=3.8,<3.11
+Requires-Python: >=3.8,<3.12
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Provides-Extra: spacy
 Provides-Extra: stanza
-Requires-Dist: coreferee (>=1.4.0,<2.0.0) ; extra == "spacy"
-Requires-Dist: datasets (>=2.16.1,<3.0.0)
-Requires-Dist: grimbert (>=0.1.0,<0.2.0)
-Requires-Dist: matplotlib (>=3.5.3,<4.0.0)
-Requires-Dist: more-itertools (>=10.1.0,<11.0.0)
-Requires-Dist: nameparser (>=1.1.0,<2.0.0)
-Requires-Dist: networkx (>=2.6.3,<3.0.0)
-Requires-Dist: nltk (>=3.6.5,<4.0.0)
-Requires-Dist: pandas (>=2.0.0,<3.0.0)
-Requires-Dist: pytest (>=7.2.1,<8.0.0)
-Requires-Dist: seqeval (==1.2.2)
-Requires-Dist: spacy (>=3.5.0,<4.0.0) ; extra == "spacy"
-Requires-Dist: spacy-transformers (>=1.2.1,<2.0.0) ; extra == "spacy"
-Requires-Dist: stanza (>=1.3.0,<2.0.0) ; extra == "stanza"
-Requires-Dist: tibert (>=0.4.0,<0.5.0)
+Requires-Dist: coreferee (>=1.4,<2.0) ; extra == "spacy"
+Requires-Dist: datasets (>=3.0,<4.0)
+Requires-Dist: grimbert (>=0.1,<0.2)
+Requires-Dist: matplotlib (>=3.5,<4.0)
+Requires-Dist: more-itertools (>=10.5,<11.0)
+Requires-Dist: nameparser (>=1.1,<2.0)
+Requires-Dist: networkx (>=3.0,<4.0)
+Requires-Dist: nltk (>=3.9,<4.0)
+Requires-Dist: pandas (>=2.0,<3.0)
+Requires-Dist: pytest (>=8.3.0,<9.0.0)
+Requires-Dist: rank-bm25 (>=0.2.2,<0.3.0)
+Requires-Dist: spacy (>=3.5,<4.0) ; extra == "spacy"
+Requires-Dist: spacy-transformers (>=1.3,<2.0) ; extra == "spacy"
+Requires-Dist: stanza (>=1.3,<2.0) ; extra == "stanza"
+Requires-Dist: tibert (>=0.5,<0.6)
 Requires-Dist: torch (>=2.0.0,!=2.0.1)
 Requires-Dist: tqdm (>=4.62.3,<5.0.0)
-Requires-Dist: transformers (>=4.36.0,<5.0.0)
+Requires-Dist: transformers (>=4.36,<5.0)
 Project-URL: Documentation, https://compnet.github.io/Renard/
 Project-URL: Repository, https://github.com/CompNet/Renard
 Description-Content-Type: text/markdown
@@ -40,7 +41,7 @@ Description-Content-Type: text/markdown
 [![DOI](https://joss.theoj.org/papers/10.21105/joss.06574/status.svg)](https://doi.org/10.21105/joss.06574)
-Renard (Relationships Extraction from NARrative Documents) is a library for creating and using custom character networks extraction pipelines. Renard can extract dynamic as well as static character networks.
+Renard (Relationship Extraction from NARrative Documents) is a library for creating and using custom character networks extraction pipelines. Renard can extract dynamic as well as static character networks.
 ![The Renard logo](./docs/renard.svg)
@@ -51,7 +52,7 @@ You can install the latest version using pip:
 > pip install renard-pipeline
-Currently, Renard supports Python 3.8, 3.9 and 3.10.
+Currently, Renard supports Python>=3.8,<=3.11
 # Documentation

{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,17 @@
 renard/gender.py,sha256=HDtJQKOqIkV8F-Mxva95XFXWJoKRKckQ3fc93OBM6sw,102
 renard/graph_utils.py,sha256=EV0_56KtI3VOElCu7wxd2kL8QVPsOu7itE6wGJAJsNA,6073
-renard/ner_utils.py,sha256=dfcPzoONjMXAnD1pfWkDF3oHPtitu71PJvvtnFKwg1A,11425
+renard/ner_utils.py,sha256=SFZoyJM6c2avE7-NDkCSzkx-O8ppzS00a8EyHt64iGI,11628
 renard/nltk_utils.py,sha256=mUJiwMrEDZV4Fla7WuMR-hA_OC2ZIwSXgW_0Ew18VSo,977
 renard/pipeline/__init__.py,sha256=8Yim2mmny8YGvM7N5-na5zK-C9UDxUb77K9ml-VirUA,35
-renard/pipeline/character_unification.py,sha256=VntpU9FCLERUx_-FTirIOw8qwFRnVsUfrbHlBMCv1AU,15694
+renard/pipeline/character_unification.py,sha256=SsMaBHfGgRAvZyYbVcm6pxnIqHqD_JyQndGvwSjsGCc,17074
 renard/pipeline/characters_extraction.py,sha256=bMic8dtlYKUmAlTzQqDPraYy5VsGWoGkho35mA8w3_Y,396
-renard/pipeline/core.py,sha256=bBB3sXhTyS1ygYGJxQaA7TYjCJVZRVvqZ9S3_UDIyV8,26941
+renard/pipeline/core.py,sha256=LILUIQZp9f3FzqjBocUS7dKzX7lHQQVdL29jyqU1UeY,27754
 renard/pipeline/corefs/__init__.py,sha256=9c9AaXBcRrDBf1jhTtJ7DyjOJhX_Zej3FjlcGak7MK8,44
-renard/pipeline/corefs/corefs.py,sha256=CpcY7cy9vvCR-xw2KrCu1IsnZjb0GyxX44MpeaYGX2Q,11415
+renard/pipeline/corefs/corefs.py,sha256=d47Sd8ekwhQQV6rQ0F9QyAX2GOTqUnkDUA-eKgMtMS4,11417
 renard/pipeline/graph_extraction.py,sha256=Ga3wfUW9tDtatcTv2taLrNky9jz2wUwZ8uzoXJoSVk8,22928
-renard/pipeline/ner.py,sha256=VQ4D-S8bcBu49VMFRu0fxQRoaLBB7VGTyKTI5vJYtEY,11271
+renard/pipeline/ner/__init__.py,sha256=Dqxcf_EKhK1UwiCscZ3gGHInlcxJyvpR4o-ZCLEyV48,38
+renard/pipeline/ner/ner.py,sha256=8zUtaqaGNirfGFRyMpDzdqtO3abrRLyLtjmwnqBNwUI,9893
+renard/pipeline/ner/retrieval.py,sha256=JIU3fi0Q1gl_YGP6kYx6zC9xz4UN6gnqdVuzWVXzzyM,12853
 renard/pipeline/preconfigured.py,sha256=j4-0OUZrmtC8rQfwGWEAAGNxc8-4hlY7N823Uami5lk,5392
 renard/pipeline/preprocessing.py,sha256=OsdsYzmRweAiQV_CtP7uiz--OGogZtQlsdR8XX5DCk0,952
 renard/pipeline/progress.py,sha256=PJ174ssaqr5qHaTrVQ8HqJtvpvX6QhtHM5PHT893_Xk,2689
@@ -17,9 +19,11 @@ renard/pipeline/quote_detection.py,sha256=FyldJhynIT843fB7rwVtHmDZJqTKkjGml6qTLj
 renard/pipeline/sentiment_analysis.py,sha256=76MPin4L1-vSswJe5yGrbCSSDim1LYxSEgNj_BdQDvk,1464
 renard/pipeline/speaker_attribution.py,sha256=Uts6JdUo_sbWyIb2AJ6SO5JuUbgROIpcbUNTg4dHo4U,4329
 renard/pipeline/stanford_corenlp.py,sha256=14b6Ee6oPz1EL-bNRT688aNxVTk_Jwa_vJ20FiBODC4,8189
-renard/pipeline/tokenization.py,sha256=BzLBG_QndbLLf2VtZtkIsFSbB0whvgrI4_hzVw_jxZY,2910
-renard/plot_utils.py,sha256=Xqga28tf1pAbAfsYE4fj87SKrs-l7-BwwUriIcTbEGA,3064
+renard/pipeline/tokenization.py,sha256=gZP0ZpAa0rhtUDPk6W0PiXRxmiC3IcSyRF_E7KaP19A,2957
+renard/plot_utils.py,sha256=qsQI-wbk_5KCXDvt1tPerq4UW4VWLrJpoCet4qkONwE,3344
 renard/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+renard/resources/determiners/__init__.py,sha256=dAcx2hWb_aAd5Rv9rif7CQOvjKcSdIY_mCXJBQQtw60,55
+renard/resources/determiners/determiners.py,sha256=lQ5XGmKWK8h6dcBp0tB2TcEJbkQ9KCHkACJ_gqWjexU,594
 renard/resources/hypocorisms/__init__.py,sha256=vlsY9PqxQCIpijxm79Y0KYh2c0S4S1pgrC9w-AUQGvE,55
 renard/resources/hypocorisms/datas/License.txt,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
 renard/resources/hypocorisms/datas/hypocorisms.csv,sha256=CKTo7A5i14NzN6JRBz7U2NJnxrEo8VOlmmdhzEZnqlI,21470
@@ -29,7 +33,7 @@ renard/resources/pronouns/pronouns.py,sha256=YJ8hM6H8QHrF2Xx6O5blqc-Sqe1D1YFL0sR
 renard/resources/titles/__init__.py,sha256=Jcg4B7stsWiAaXbFgNl_L3ICtCQmFe9bo3YjdkVL50w,45
 renard/resources/titles/titles.py,sha256=GsFccVJuTkgDWiAqWZpFd2R9pGvFKQZBOk4RWWuWDkw,968
 renard/utils.py,sha256=WL6djr3iu5Kzo2Jq6qDllHXgvZcEnmqBxPkQf1drq7c,4072
-renard_pipeline-0.5.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-renard_pipeline-0.5.0.dist-info/METADATA,sha256=fX4hE68M-gnnpocVqV2FqvfypIsv4gNl3Usz3n5kc7Q,4379
-renard_pipeline-0.5.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-renard_pipeline-0.5.0.dist-info/RECORD,,
+renard_pipeline-0.6.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+renard_pipeline-0.6.1.dist-info/METADATA,sha256=vijGA3DMBq0Tkn2SJxMKacOw8zI5Z4IDSmIBWBuMEuM,4374
+renard_pipeline-0.6.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+renard_pipeline-0.6.1.dist-info/RECORD,,

{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{renard_pipeline-0.5.0.dist-info → renard_pipeline-0.6.1.dist-info}/WHEEL RENAMED Viewed

File without changes

renard-pipeline 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

Potentially problematic release.

renard-pipeline 0.5.0py3-none-any.whl → 0.6.1py3-none-any.whl