renard-pipeline 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of renard-pipeline might be problematic. Click here for more details.
- renard/graph_utils.py +11 -4
- renard/ner_utils.py +4 -0
- renard/pipeline/character_unification.py +26 -6
- renard/pipeline/characters_extraction.py +3 -1
- renard/pipeline/core.py +121 -26
- renard/pipeline/corefs/corefs.py +30 -31
- renard/pipeline/graph_extraction.py +281 -192
- renard/pipeline/ner.py +3 -2
- renard/pipeline/progress.py +32 -1
- renard/pipeline/speaker_attribution.py +2 -3
- renard/pipeline/tokenization.py +59 -30
- renard/plot_utils.py +41 -28
- renard/resources/hypocorisms/hypocorisms.py +3 -2
- renard/utils.py +57 -1
- {renard_pipeline-0.4.1.dist-info → renard_pipeline-0.5.0.dist-info}/METADATA +27 -3
- {renard_pipeline-0.4.1.dist-info → renard_pipeline-0.5.0.dist-info}/RECORD +18 -18
- {renard_pipeline-0.4.1.dist-info → renard_pipeline-0.5.0.dist-info}/WHEEL +1 -1
- {renard_pipeline-0.4.1.dist-info → renard_pipeline-0.5.0.dist-info}/LICENSE +0 -0
renard/pipeline/ner.py
CHANGED
|
@@ -16,6 +16,7 @@ from renard.ner_utils import ner_entities
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from transformers.tokenization_utils_base import BatchEncoding
|
|
18
18
|
from transformers import PreTrainedModel, PreTrainedTokenizerFast
|
|
19
|
+
from renard.pipeline.core import Pipeline
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dataclass
|
|
@@ -214,10 +215,10 @@ class BertNamedEntityRecognizer(PipelineStep):
|
|
|
214
215
|
|
|
215
216
|
super().__init__()
|
|
216
217
|
|
|
217
|
-
def _pipeline_init_(self, lang: str,
|
|
218
|
+
def _pipeline_init_(self, lang: str, **kwargs):
|
|
218
219
|
from transformers import AutoModelForTokenClassification, AutoTokenizer # type: ignore
|
|
219
220
|
|
|
220
|
-
super()._pipeline_init_(lang,
|
|
221
|
+
super()._pipeline_init_(lang, **kwargs)
|
|
221
222
|
|
|
222
223
|
# init model if needed (this happens if the user did not pass
|
|
223
224
|
# the instance of a model)
|
renard/pipeline/progress.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
1
2
|
from typing import Iterable, Literal, Optional, TypeVar, Generator
|
|
3
|
+
import sys
|
|
2
4
|
from tqdm import tqdm
|
|
3
5
|
|
|
4
6
|
|
|
@@ -20,6 +22,10 @@ class ProgressReporter:
|
|
|
20
22
|
"""Update reporter current message."""
|
|
21
23
|
pass
|
|
22
24
|
|
|
25
|
+
def get_subreporter(self) -> ProgressReporter:
|
|
26
|
+
"""Get the subreporter corresponding to that reporter."""
|
|
27
|
+
raise NotImplementedError
|
|
28
|
+
|
|
23
29
|
|
|
24
30
|
class NoopProgressReporter(ProgressReporter):
|
|
25
31
|
def reset_(self):
|
|
@@ -28,6 +34,28 @@ class NoopProgressReporter(ProgressReporter):
|
|
|
28
34
|
def update_progress_(self, added_progress: int):
|
|
29
35
|
pass
|
|
30
36
|
|
|
37
|
+
def get_subreporter(self) -> ProgressReporter:
|
|
38
|
+
return NoopProgressReporter()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TQDMSubProgressReporter(ProgressReporter):
|
|
42
|
+
def __init__(self, reporter: TQDMProgressReporter) -> None:
|
|
43
|
+
super().__init__()
|
|
44
|
+
self.reporter = reporter
|
|
45
|
+
|
|
46
|
+
def start_(self, total: int):
|
|
47
|
+
super().start_(total)
|
|
48
|
+
self.progress = 0
|
|
49
|
+
|
|
50
|
+
def update_progress_(self, added_progress: int):
|
|
51
|
+
self.progress += added_progress
|
|
52
|
+
self.reporter.tqdm.set_postfix(step=f"({self.progress}/{self.total})")
|
|
53
|
+
|
|
54
|
+
def update_message_(self, message: str):
|
|
55
|
+
self.reporter.tqdm.set_postfix(
|
|
56
|
+
step=f"({self.progress}/{self.total})", message=message
|
|
57
|
+
)
|
|
58
|
+
|
|
31
59
|
|
|
32
60
|
class TQDMProgressReporter(ProgressReporter):
|
|
33
61
|
def start_(self, total: int):
|
|
@@ -40,6 +68,9 @@ class TQDMProgressReporter(ProgressReporter):
|
|
|
40
68
|
def update_message_(self, message: str):
|
|
41
69
|
self.tqdm.set_description_str(message)
|
|
42
70
|
|
|
71
|
+
def get_subreporter(self) -> ProgressReporter:
|
|
72
|
+
return TQDMSubProgressReporter(self)
|
|
73
|
+
|
|
43
74
|
|
|
44
75
|
T = TypeVar("T")
|
|
45
76
|
|
|
@@ -62,5 +93,5 @@ def get_progress_reporter(name: Optional[Literal["tqdm"]]) -> ProgressReporter:
|
|
|
62
93
|
return NoopProgressReporter()
|
|
63
94
|
if name == "tqdm":
|
|
64
95
|
return TQDMProgressReporter()
|
|
65
|
-
print(f"[warning] unknown progress reporter: {name}")
|
|
96
|
+
print(f"[warning] unknown progress reporter: {name}", file=sys.stderr)
|
|
66
97
|
return NoopProgressReporter()
|
|
@@ -49,13 +49,12 @@ class BertSpeakerDetector(PipelineStep):
|
|
|
49
49
|
|
|
50
50
|
super().__init__()
|
|
51
51
|
|
|
52
|
-
def _pipeline_init_(self, lang: str,
|
|
52
|
+
def _pipeline_init_(self, lang: str, **kwargs):
|
|
53
53
|
from transformers import AutoTokenizer
|
|
54
54
|
|
|
55
|
-
super()._pipeline_init_(lang,
|
|
55
|
+
super()._pipeline_init_(lang, **kwargs)
|
|
56
56
|
|
|
57
57
|
if self.model is None:
|
|
58
|
-
|
|
59
58
|
# the user supplied a huggingface ID: load model from the HUB
|
|
60
59
|
if not self.huggingface_model_id is None:
|
|
61
60
|
self.model = SpeakerAttributionModel.from_pretrained(
|
renard/pipeline/tokenization.py
CHANGED
|
@@ -1,49 +1,78 @@
|
|
|
1
|
-
from typing import Dict, Any,
|
|
2
|
-
import
|
|
3
|
-
import torch
|
|
1
|
+
from typing import Dict, Any, Set, Union, Literal, List, Tuple
|
|
2
|
+
from more_itertools import windowed
|
|
4
3
|
import nltk
|
|
5
|
-
from
|
|
6
|
-
from
|
|
4
|
+
from nltk.data import load
|
|
5
|
+
from nltk.tokenize.destructive import NLTKWordTokenizer
|
|
7
6
|
from renard.pipeline.core import PipelineStep
|
|
8
|
-
from renard.pipeline.progress import ProgressReporter
|
|
9
7
|
from renard.nltk_utils import NLTK_ISO_STRING_TO_LANG
|
|
10
8
|
|
|
11
9
|
|
|
10
|
+
def make_char2token(text: str, token2chars: List[Tuple[int, int]]) -> List[int]:
|
|
11
|
+
if len(token2chars) == 0:
|
|
12
|
+
return []
|
|
13
|
+
|
|
14
|
+
c2t = [None] * len(text)
|
|
15
|
+
for token_i, chars in enumerate(token2chars):
|
|
16
|
+
for char_i in range(*chars):
|
|
17
|
+
c2t[char_i] = token_i # type: ignore
|
|
18
|
+
|
|
19
|
+
for char_i in range(0, token2chars[0][0]):
|
|
20
|
+
c2t[char_i] = 0 # type: ignore
|
|
21
|
+
for chars1, chars2 in windowed(token2chars, 2):
|
|
22
|
+
if chars1 is None or chars2 is None:
|
|
23
|
+
continue
|
|
24
|
+
end1 = chars1[1]
|
|
25
|
+
start2 = chars2[0]
|
|
26
|
+
for char_i in range(end1, start2):
|
|
27
|
+
c2t[char_i] = c2t[end1 - 1]
|
|
28
|
+
for char_i in range(token2chars[-1][1], len(c2t)):
|
|
29
|
+
c2t[char_i] = token2chars[-1][1] # type: ignore
|
|
30
|
+
|
|
31
|
+
assert all([not i is None for i in c2t])
|
|
32
|
+
return c2t # type: ignore
|
|
33
|
+
|
|
34
|
+
|
|
12
35
|
class NLTKTokenizer(PipelineStep):
|
|
13
|
-
"""
|
|
36
|
+
"""A NLTK-based tokenizer"""
|
|
14
37
|
|
|
15
38
|
def __init__(self):
|
|
16
39
|
nltk.download("punkt", quiet=True)
|
|
40
|
+
self.word_tokenizer = None
|
|
41
|
+
self.sent_tokenizer = None
|
|
17
42
|
super().__init__()
|
|
18
43
|
|
|
19
|
-
def
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
),
|
|
32
|
-
"chapter_tokens": [d["tokens"] for d in out_dicts],
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
sentences = nltk.sent_tokenize(
|
|
36
|
-
text, language=NLTK_ISO_STRING_TO_LANG[self.lang]
|
|
37
|
-
)
|
|
44
|
+
def _pipeline_init_(self, lang: str, **kwargs):
|
|
45
|
+
assert lang in NLTK_ISO_STRING_TO_LANG
|
|
46
|
+
nltk_lang = NLTK_ISO_STRING_TO_LANG[lang]
|
|
47
|
+
self.word_tokenizer = NLTKWordTokenizer()
|
|
48
|
+
self.sent_tokenizer = load(f"tokenizers/punkt/{nltk_lang}.pickle")
|
|
49
|
+
super()._pipeline_init_(lang, **kwargs)
|
|
50
|
+
|
|
51
|
+
def __call__(self, text: str, **kwargs) -> Dict[str, Any]:
|
|
52
|
+
assert not self.word_tokenizer is None
|
|
53
|
+
assert not self.sent_tokenizer is None
|
|
54
|
+
|
|
55
|
+
sent_indices = self.sent_tokenizer.span_tokenize(text)
|
|
38
56
|
|
|
39
57
|
tokens = []
|
|
58
|
+
token2chars = []
|
|
40
59
|
tokenized_sentences = []
|
|
41
|
-
for
|
|
42
|
-
|
|
60
|
+
for sent_start, sent_end in sent_indices:
|
|
61
|
+
sent = text[sent_start:sent_end]
|
|
62
|
+
sent_tokens_indices = list(self.word_tokenizer.span_tokenize(sent))
|
|
63
|
+
token2chars += [
|
|
64
|
+
(start + sent_start, end + sent_start)
|
|
65
|
+
for start, end in sent_tokens_indices
|
|
66
|
+
]
|
|
67
|
+
sent_tokens = [sent[start:end] for start, end in sent_tokens_indices]
|
|
43
68
|
tokenized_sentences.append(sent_tokens)
|
|
44
69
|
tokens += sent_tokens
|
|
45
70
|
|
|
46
|
-
return {
|
|
71
|
+
return {
|
|
72
|
+
"tokens": tokens,
|
|
73
|
+
"char2token": make_char2token(text, token2chars),
|
|
74
|
+
"sentences": tokenized_sentences,
|
|
75
|
+
}
|
|
47
76
|
|
|
48
77
|
def supported_langs(self) -> Union[Set[str], Literal["any"]]:
|
|
49
78
|
return set(NLTK_ISO_STRING_TO_LANG.keys())
|
|
@@ -52,4 +81,4 @@ class NLTKTokenizer(PipelineStep):
|
|
|
52
81
|
return {"text"}
|
|
53
82
|
|
|
54
83
|
def production(self) -> Set[str]:
|
|
55
|
-
return {"tokens", "
|
|
84
|
+
return {"tokens", "char2token", "sentences"}
|
renard/plot_utils.py
CHANGED
|
@@ -15,53 +15,66 @@ CharactersGraphLayout = Union[
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def layout_nx_graph_reasonably(G: nx.Graph) -> Dict[Any, np.ndarray]:
|
|
18
|
-
return nx.spring_layout(G, k=
|
|
18
|
+
return nx.spring_layout(G, k=min(1.5, 8 / math.sqrt(len(G.nodes)))) # type: ignore
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def plot_nx_graph_reasonably(
|
|
21
|
+
def plot_nx_graph_reasonably(
|
|
22
|
+
G: nx.Graph,
|
|
23
|
+
ax=None,
|
|
24
|
+
layout: Optional[dict] = None,
|
|
25
|
+
node_kwargs: Optional[Dict[str, Any]] = None,
|
|
26
|
+
edge_kwargs: Optional[Dict[str, Any]] = None,
|
|
27
|
+
label_kwargs: Optional[Dict[str, Any]] = None,
|
|
28
|
+
):
|
|
22
29
|
"""Try to plot a :class:`nx.Graph` with 'reasonable' parameters
|
|
23
30
|
|
|
24
31
|
:param G: the graph to draw
|
|
25
32
|
:param ax: matplotlib axes
|
|
26
33
|
:param layout: if given, this graph layout will be applied.
|
|
27
34
|
Otherwise, use :func:`layout_nx_graph_reasonably`.
|
|
35
|
+
:param node_kwargs: passed to :func:`nx.draw_networkx_nodes`
|
|
36
|
+
:param edge_kwargs: passed to :func:`nx.draw_networkx_nodes`
|
|
37
|
+
:param label_kwargs: passed to :func:`nx.draw_networkx_labels`
|
|
28
38
|
"""
|
|
29
39
|
pos = layout
|
|
30
40
|
if pos is None:
|
|
31
41
|
pos = layout_nx_graph_reasonably(G)
|
|
32
42
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
node_color=[degree for _, degree in G.degree], # type: ignore
|
|
37
|
-
cmap=plt.get_cmap("winter_r"),
|
|
38
|
-
node_size=[1 + degree * 10 for _, degree in G.degree], # type: ignore
|
|
39
|
-
ax=ax,
|
|
43
|
+
node_kwargs = node_kwargs or {}
|
|
44
|
+
node_kwargs["node_color"] = node_kwargs.get(
|
|
45
|
+
"node_color", [degree for _, degree in G.degree]
|
|
40
46
|
)
|
|
47
|
+
node_kwargs["cmap"] = node_kwargs.get("cmap", "viridis")
|
|
48
|
+
node_kwargs["node_size"] = node_kwargs.get(
|
|
49
|
+
"node_size", [1 + degree * 10 for _, degree in G.degree]
|
|
50
|
+
)
|
|
51
|
+
nx.draw_networkx_nodes(G, pos, ax=ax, **node_kwargs)
|
|
41
52
|
|
|
53
|
+
edge_kwargs = edge_kwargs or {}
|
|
42
54
|
edges_attrs = graph_edges_attributes(G)
|
|
43
|
-
if
|
|
55
|
+
if (
|
|
56
|
+
not "edge_color" in edge_kwargs
|
|
57
|
+
and not "edge_cmap" in edge_kwargs
|
|
58
|
+
and "polarity" in edges_attrs
|
|
59
|
+
):
|
|
44
60
|
# we draw the polarity of interactions if the 'polarity'
|
|
45
61
|
# attribute is present in the graph
|
|
46
62
|
polarities = [d.get("polarity", 0) for *_, d in G.edges.data()] # type: ignore
|
|
47
|
-
edge_color = ["g" if p > 0 else "r" for p in polarities]
|
|
48
|
-
edge_cmap = None
|
|
49
|
-
|
|
63
|
+
edge_kwargs["edge_color"] = ["g" if p > 0 else "r" for p in polarities]
|
|
64
|
+
edge_kwargs["edge_cmap"] = None
|
|
50
65
|
else:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
edge_cmap=edge_cmap,
|
|
58
|
-
edge_vmax=1,
|
|
59
|
-
edge_vmin=-1,
|
|
60
|
-
width=[1 + math.log(d["weight"]) for _, _, d in G.edges.data()], # type: ignore
|
|
61
|
-
alpha=0.35,
|
|
62
|
-
ax=ax,
|
|
66
|
+
edge_kwargs["edge_color"] = edge_kwargs.get(
|
|
67
|
+
"edge_color", [math.log(d["weight"]) for *_, d in G.edges.data()]
|
|
68
|
+
)
|
|
69
|
+
edge_kwargs["edge_cmap"] = edge_kwargs.get("edge_cmap", plt.get_cmap("viridis"))
|
|
70
|
+
edge_kwargs["width"] = edge_kwargs.get(
|
|
71
|
+
"width", [1 + math.log(d["weight"]) for _, _, d in G.edges.data()]
|
|
63
72
|
)
|
|
73
|
+
edge_kwargs["alpha"] = edge_kwargs.get("alpha", 0.35)
|
|
74
|
+
nx.draw_networkx_edges(G, pos, ax=ax, **edge_kwargs)
|
|
64
75
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
76
|
+
label_kwargs = label_kwargs or {}
|
|
77
|
+
label_kwargs["verticalalignment"] = label_kwargs.get("verticalalignment", "top")
|
|
78
|
+
label_kwargs["font_size"] = label_kwargs.get("font_size", 8)
|
|
79
|
+
label_kwargs["alpha"] = label_kwargs.get("alpha", 0.75)
|
|
80
|
+
nx.draw_networkx_labels(G, pos=pos, ax=ax, **label_kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Dict, List, Set, Tuple
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
import os
|
|
3
|
+
import os, sys
|
|
4
4
|
|
|
5
5
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
6
6
|
|
|
@@ -24,7 +24,8 @@ class HypocorismGazetteer:
|
|
|
24
24
|
"""
|
|
25
25
|
if not lang in HypocorismGazetteer.supported_langs:
|
|
26
26
|
print(
|
|
27
|
-
f"[warning] {lang} not supported by {type(self)} (supported languages: {HypocorismGazetteer.supported_langs})"
|
|
27
|
+
f"[warning] {lang} not supported by {type(self)} (supported languages: {HypocorismGazetteer.supported_langs})",
|
|
28
|
+
file=sys.stderr,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
self.name_to_nicknames = defaultdict(set)
|
renard/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from typing import List, Tuple, TypeVar, Collection, Iterable, cast
|
|
1
|
+
from typing import List, Literal, Tuple, TypeVar, Collection, Iterable, cast, Union
|
|
2
|
+
import sys
|
|
2
3
|
from more_itertools.more import windowed
|
|
3
4
|
import torch
|
|
4
5
|
|
|
@@ -76,3 +77,58 @@ def search_pattern(seq: Iterable[R], pattern: List[R]) -> List[int]:
|
|
|
76
77
|
if list(subseq) == pattern:
|
|
77
78
|
start_indices.append(subseq_i)
|
|
78
79
|
return start_indices
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#: A `BlockBounds` delimits blocks in either raw text ("characters") or
|
|
83
|
+
#: tokenized text ("tokens"). It has the following form:
|
|
84
|
+
#:
|
|
85
|
+
#: ([(block start, block end), ...], unit)
|
|
86
|
+
#:
|
|
87
|
+
#: see :func:`block_indices` to easily create `BlockBounds`
|
|
88
|
+
BlockBounds = Tuple[List[Tuple[int, int]], Literal["characters", "tokens"]]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def block_bounds(blocks: Union[List[str], List[List[str]]]) -> BlockBounds:
|
|
92
|
+
"""Return the boundaries of a series of blocks.
|
|
93
|
+
|
|
94
|
+
:param blocks: either a list of raw texts or a list of tokenized
|
|
95
|
+
texts.
|
|
96
|
+
|
|
97
|
+
:return: A `BlockBounds` with the correct unit.
|
|
98
|
+
"""
|
|
99
|
+
if len(blocks) == 0:
|
|
100
|
+
print("[warning] computing block bounds on 0 blocks.", file=sys.stderr)
|
|
101
|
+
return ([], ("characters"))
|
|
102
|
+
|
|
103
|
+
if isinstance(blocks[0], str):
|
|
104
|
+
unit = "characters"
|
|
105
|
+
elif isinstance(blocks[0], list):
|
|
106
|
+
unit = "tokens"
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(blocks)
|
|
109
|
+
|
|
110
|
+
indices = []
|
|
111
|
+
start = 0
|
|
112
|
+
for block in blocks:
|
|
113
|
+
end = start + len(block)
|
|
114
|
+
indices.append((start, end))
|
|
115
|
+
start = end
|
|
116
|
+
|
|
117
|
+
return (indices, unit)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def charbb2tokenbb(char_bb: BlockBounds, char2token: List[int]) -> BlockBounds:
|
|
121
|
+
"""Convert a `BlockBounds` in characters to a `BlockBounds` in
|
|
122
|
+
tokens.
|
|
123
|
+
|
|
124
|
+
:param char_bb: block bounds, in 'characters'.
|
|
125
|
+
:param char2token: a list with ``char2token[i]`` being the index
|
|
126
|
+
of token corresponding to character ``i``.
|
|
127
|
+
|
|
128
|
+
:return: a `BlockBounds`, in 'tokens'.
|
|
129
|
+
"""
|
|
130
|
+
assert char_bb[1] == "characters"
|
|
131
|
+
tokens_blocks = []
|
|
132
|
+
for char_block_start, char_block_end in char_bb[0]:
|
|
133
|
+
tokens_blocks.append((char2token[char_block_start], char2token[char_block_end]))
|
|
134
|
+
return (tokens_blocks, "tokens")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: renard-pipeline
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Relationships Extraction from NARrative Documents
|
|
5
5
|
Home-page: https://github.com/CompNet/Renard
|
|
6
6
|
License: GPL-3.0-only
|
|
@@ -28,7 +28,7 @@ Requires-Dist: seqeval (==1.2.2)
|
|
|
28
28
|
Requires-Dist: spacy (>=3.5.0,<4.0.0) ; extra == "spacy"
|
|
29
29
|
Requires-Dist: spacy-transformers (>=1.2.1,<2.0.0) ; extra == "spacy"
|
|
30
30
|
Requires-Dist: stanza (>=1.3.0,<2.0.0) ; extra == "stanza"
|
|
31
|
-
Requires-Dist: tibert (>=0.
|
|
31
|
+
Requires-Dist: tibert (>=0.4.0,<0.5.0)
|
|
32
32
|
Requires-Dist: torch (>=2.0.0,!=2.0.1)
|
|
33
33
|
Requires-Dist: tqdm (>=4.62.3,<5.0.0)
|
|
34
34
|
Requires-Dist: transformers (>=4.36.0,<5.0.0)
|
|
@@ -38,9 +38,11 @@ Description-Content-Type: text/markdown
|
|
|
38
38
|
|
|
39
39
|
# Renard
|
|
40
40
|
|
|
41
|
+
[](https://doi.org/10.21105/joss.06574)
|
|
42
|
+
|
|
41
43
|
Renard (Relationships Extraction from NARrative Documents) is a library for creating and using custom character networks extraction pipelines. Renard can extract dynamic as well as static character networks.
|
|
42
44
|
|
|
43
|
-

|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
# Installation
|
|
@@ -102,3 +104,25 @@ Expensive tests are disabled by default. These can be run by setting the environ
|
|
|
102
104
|
|
|
103
105
|
see [the "Contributing" section of the documentation](https://compnet.github.io/Renard/contributing.html).
|
|
104
106
|
|
|
107
|
+
|
|
108
|
+
# How to cite
|
|
109
|
+
|
|
110
|
+
If you use Renard in your research project, please cite it as follows:
|
|
111
|
+
|
|
112
|
+
```bibtex
|
|
113
|
+
@Article{Amalvy2024,
|
|
114
|
+
doi = {10.21105/joss.06574},
|
|
115
|
+
year = {2024},
|
|
116
|
+
publisher = {The Open Journal},
|
|
117
|
+
volume = {9},
|
|
118
|
+
number = {98},
|
|
119
|
+
pages = {6574},
|
|
120
|
+
author = {Amalvy, A. and Labatut, V. and Dufour, R.},
|
|
121
|
+
title = {Renard: A Modular Pipeline for Extracting Character
|
|
122
|
+
Networks from Narrative Texts},
|
|
123
|
+
journal = {Journal of Open Source Software},
|
|
124
|
+
}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
We would be happy to hear about your usage of Renard, so don't hesitate to reach out!
|
|
128
|
+
|
|
@@ -1,35 +1,35 @@
|
|
|
1
1
|
renard/gender.py,sha256=HDtJQKOqIkV8F-Mxva95XFXWJoKRKckQ3fc93OBM6sw,102
|
|
2
|
-
renard/graph_utils.py,sha256=
|
|
3
|
-
renard/ner_utils.py,sha256=
|
|
2
|
+
renard/graph_utils.py,sha256=EV0_56KtI3VOElCu7wxd2kL8QVPsOu7itE6wGJAJsNA,6073
|
|
3
|
+
renard/ner_utils.py,sha256=dfcPzoONjMXAnD1pfWkDF3oHPtitu71PJvvtnFKwg1A,11425
|
|
4
4
|
renard/nltk_utils.py,sha256=mUJiwMrEDZV4Fla7WuMR-hA_OC2ZIwSXgW_0Ew18VSo,977
|
|
5
5
|
renard/pipeline/__init__.py,sha256=8Yim2mmny8YGvM7N5-na5zK-C9UDxUb77K9ml-VirUA,35
|
|
6
|
-
renard/pipeline/character_unification.py,sha256=
|
|
7
|
-
renard/pipeline/characters_extraction.py,sha256=
|
|
8
|
-
renard/pipeline/core.py,sha256=
|
|
6
|
+
renard/pipeline/character_unification.py,sha256=VntpU9FCLERUx_-FTirIOw8qwFRnVsUfrbHlBMCv1AU,15694
|
|
7
|
+
renard/pipeline/characters_extraction.py,sha256=bMic8dtlYKUmAlTzQqDPraYy5VsGWoGkho35mA8w3_Y,396
|
|
8
|
+
renard/pipeline/core.py,sha256=bBB3sXhTyS1ygYGJxQaA7TYjCJVZRVvqZ9S3_UDIyV8,26941
|
|
9
9
|
renard/pipeline/corefs/__init__.py,sha256=9c9AaXBcRrDBf1jhTtJ7DyjOJhX_Zej3FjlcGak7MK8,44
|
|
10
|
-
renard/pipeline/corefs/corefs.py,sha256=
|
|
11
|
-
renard/pipeline/graph_extraction.py,sha256=
|
|
12
|
-
renard/pipeline/ner.py,sha256=
|
|
10
|
+
renard/pipeline/corefs/corefs.py,sha256=CpcY7cy9vvCR-xw2KrCu1IsnZjb0GyxX44MpeaYGX2Q,11415
|
|
11
|
+
renard/pipeline/graph_extraction.py,sha256=Ga3wfUW9tDtatcTv2taLrNky9jz2wUwZ8uzoXJoSVk8,22928
|
|
12
|
+
renard/pipeline/ner.py,sha256=VQ4D-S8bcBu49VMFRu0fxQRoaLBB7VGTyKTI5vJYtEY,11271
|
|
13
13
|
renard/pipeline/preconfigured.py,sha256=j4-0OUZrmtC8rQfwGWEAAGNxc8-4hlY7N823Uami5lk,5392
|
|
14
14
|
renard/pipeline/preprocessing.py,sha256=OsdsYzmRweAiQV_CtP7uiz--OGogZtQlsdR8XX5DCk0,952
|
|
15
|
-
renard/pipeline/progress.py,sha256=
|
|
15
|
+
renard/pipeline/progress.py,sha256=PJ174ssaqr5qHaTrVQ8HqJtvpvX6QhtHM5PHT893_Xk,2689
|
|
16
16
|
renard/pipeline/quote_detection.py,sha256=FyldJhynIT843fB7rwVtHmDZJqTKkjGml6qTLjsIhMA,2045
|
|
17
17
|
renard/pipeline/sentiment_analysis.py,sha256=76MPin4L1-vSswJe5yGrbCSSDim1LYxSEgNj_BdQDvk,1464
|
|
18
|
-
renard/pipeline/speaker_attribution.py,sha256=
|
|
18
|
+
renard/pipeline/speaker_attribution.py,sha256=Uts6JdUo_sbWyIb2AJ6SO5JuUbgROIpcbUNTg4dHo4U,4329
|
|
19
19
|
renard/pipeline/stanford_corenlp.py,sha256=14b6Ee6oPz1EL-bNRT688aNxVTk_Jwa_vJ20FiBODC4,8189
|
|
20
|
-
renard/pipeline/tokenization.py,sha256=
|
|
21
|
-
renard/plot_utils.py,sha256=
|
|
20
|
+
renard/pipeline/tokenization.py,sha256=BzLBG_QndbLLf2VtZtkIsFSbB0whvgrI4_hzVw_jxZY,2910
|
|
21
|
+
renard/plot_utils.py,sha256=Xqga28tf1pAbAfsYE4fj87SKrs-l7-BwwUriIcTbEGA,3064
|
|
22
22
|
renard/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
renard/resources/hypocorisms/__init__.py,sha256=vlsY9PqxQCIpijxm79Y0KYh2c0S4S1pgrC9w-AUQGvE,55
|
|
24
24
|
renard/resources/hypocorisms/datas/License.txt,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
|
25
25
|
renard/resources/hypocorisms/datas/hypocorisms.csv,sha256=CKTo7A5i14NzN6JRBz7U2NJnxrEo8VOlmmdhzEZnqlI,21470
|
|
26
|
-
renard/resources/hypocorisms/hypocorisms.py,sha256=
|
|
26
|
+
renard/resources/hypocorisms/hypocorisms.py,sha256=rFFKKr-rEsd5wbz_SYjadrgKdEWwxMwVR1NQu_wcPqI,2887
|
|
27
27
|
renard/resources/pronouns/__init__.py,sha256=62h0zuXp8kCToTLTyg8D8rJ-MXQpT8Vyc6mljcD1RGU,49
|
|
28
28
|
renard/resources/pronouns/pronouns.py,sha256=YJ8hM6H8QHrF2Xx6O5blqc-Sqe1D1YFL0sRdqO_rroE,817
|
|
29
29
|
renard/resources/titles/__init__.py,sha256=Jcg4B7stsWiAaXbFgNl_L3ICtCQmFe9bo3YjdkVL50w,45
|
|
30
30
|
renard/resources/titles/titles.py,sha256=GsFccVJuTkgDWiAqWZpFd2R9pGvFKQZBOk4RWWuWDkw,968
|
|
31
|
-
renard/utils.py,sha256=
|
|
32
|
-
renard_pipeline-0.
|
|
33
|
-
renard_pipeline-0.
|
|
34
|
-
renard_pipeline-0.
|
|
35
|
-
renard_pipeline-0.
|
|
31
|
+
renard/utils.py,sha256=WL6djr3iu5Kzo2Jq6qDllHXgvZcEnmqBxPkQf1drq7c,4072
|
|
32
|
+
renard_pipeline-0.5.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
33
|
+
renard_pipeline-0.5.0.dist-info/METADATA,sha256=fX4hE68M-gnnpocVqV2FqvfypIsv4gNl3Usz3n5kc7Q,4379
|
|
34
|
+
renard_pipeline-0.5.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
35
|
+
renard_pipeline-0.5.0.dist-info/RECORD,,
|
|
File without changes
|